diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-10 13:56:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-10 13:56:13 -0700 |
commit | 1b21f458ddbc8fb6fceeb68158e9e04b2571dabd (patch) | |
tree | 6ad7a02eba52a17e7a5d2e5de07b2918705c97bb | |
parent | 01370f0603f8435d415a19f7e62d1bab826c3589 (diff) | |
parent | 3ebf44902f77537b5784eb5059c2b78d8b5a920a (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (57 commits)
[GFS2] Accept old format NFS filehandles
[GFS2] Small fixes to logging code
[DLM] dump more lock values
[GFS2] Remove i_mode passing from NFS File Handle
[GFS2] Obtaining no_formal_ino from directory entry
[GFS2] git-gfs2-nmw-build-fix
[GFS2] System won't suspend with GFS2 file system mounted
[GFS2] remounting w/o acl option leaves acls enabled
[GFS2] inode size inconsistency
[DLM] Telnet to port 21064 can stop all lockspaces
[GFS2] Fix gfs2_block_truncate_page err return
[GFS2] Addendum to the journaled file/unmount patch
[GFS2] Simplify multiple glock aquisition
[GFS2] assertion failure after writing to journaled file, umount
[GFS2] Use zero_user_page() in stuffed_readpage()
[GFS2] Remove bogus '\0' in rgrp.c
[GFS2] Journaled file write/unstuff bug
[DLM] don't require FS flag on all nodes
[GFS2] Fix deallocation issues
[GFS2] return conflicts for GETLK
...
62 files changed, 2249 insertions, 1107 deletions
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index 604cf7dc5f3..d248e60951b 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile @@ -8,6 +8,7 @@ dlm-y := ast.o \ member.o \ memory.o \ midcomms.o \ + netlink.o \ lowcomms.o \ rcom.o \ recover.o \ diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 822abdcd143..5069b2cb5a1 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -90,6 +90,7 @@ struct cluster { unsigned int cl_scan_secs; unsigned int cl_log_debug; unsigned int cl_protocol; + unsigned int cl_timewarn_cs; }; enum { @@ -103,6 +104,7 @@ enum { CLUSTER_ATTR_SCAN_SECS, CLUSTER_ATTR_LOG_DEBUG, CLUSTER_ATTR_PROTOCOL, + CLUSTER_ATTR_TIMEWARN_CS, }; struct cluster_attribute { @@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1); CLUSTER_ATTR(scan_secs, 1); CLUSTER_ATTR(log_debug, 0); CLUSTER_ATTR(protocol, 0); +CLUSTER_ATTR(timewarn_cs, 1); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, @@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, + [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, NULL, }; @@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_toss_secs = dlm_config.ci_toss_secs; cl->cl_scan_secs = dlm_config.ci_scan_secs; cl->cl_log_debug = dlm_config.ci_log_debug; + cl->cl_protocol = dlm_config.ci_protocol; + cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) static struct space *get_space(char *name) { + struct config_item *i; + if (!space_list) return NULL; - return to_space(config_group_find_obj(space_list, name)); + + down(&space_list->cg_subsys->su_sem); + i = config_group_find_obj(space_list, name); + up(&space_list->cg_subsys->su_sem); + + return to_space(i); } static void put_space(struct space *sp) @@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) if (cm->nodeid != nodeid) continue; found = 1; + config_item_get(i); break; } else { if (!cm->addr_count || memcmp(cm->addr[0], addr, sizeof(*addr))) continue; found = 1; + config_item_get(i); break; } } up(&clusters_root.subsys.su_sem); - if (found) - config_item_get(i); - else + if (!found) cm = NULL; return cm; } @@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_SCAN_SECS 5 #define DEFAULT_LOG_DEBUG 0 #define DEFAULT_PROTOCOL 0 +#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, @@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = { .ci_toss_secs = DEFAULT_TOSS_SECS, .ci_scan_secs = DEFAULT_SCAN_SECS, .ci_log_debug = DEFAULT_LOG_DEBUG, - .ci_protocol = DEFAULT_PROTOCOL + .ci_protocol = DEFAULT_PROTOCOL, + .ci_timewarn_cs = DEFAULT_TIMEWARN_CS }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 967cc3d72e5..a3170fe2209 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -27,6 +27,7 @@ struct dlm_config_info { int ci_scan_secs; int ci_log_debug; int ci_protocol; + int ci_timewarn_cs; }; extern struct dlm_config_info dlm_config; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 61ba670b9e0..12c3bfd5e66 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -17,6 +17,7 @@ #include <linux/debugfs.h> #include "dlm_internal.h" +#include "lock.h" #define DLM_DEBUG_BUF_LEN 4096 static char debug_buf[DLM_DEBUG_BUF_LEN]; @@ -26,6 +27,8 @@ static struct dentry *dlm_root; struct rsb_iter { int entry; + int locks; + int header; struct dlm_ls *ls; struct list_head *next; struct dlm_rsb *rsb; @@ -57,8 +60,8 @@ static char *print_lockmode(int mode) } } -static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, - struct dlm_rsb *res) +static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb, + struct dlm_rsb *res) { seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); @@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) struct dlm_lkb *lkb; int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list; + lock_rsb(res); + seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); for (i = 0; i < res->res_length; i++) { if (isprint(res->res_name[i])) @@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) /* Print the locks attached to this resource */ seq_printf(s, "Granted Queue\n"); list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); seq_printf(s, "Conversion Queue\n"); list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); seq_printf(s, "Waiting Queue\n"); list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); if (list_empty(&res->res_lookup)) goto out; @@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) seq_printf(s, "\n"); } out: + unlock_rsb(res); + return 0; +} + +static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r) +{ + struct dlm_user_args *ua; + unsigned int waiting = 0; + uint64_t xid = 0; + + if (lkb->lkb_flags & DLM_IFL_USER) { + ua = (struct dlm_user_args *) lkb->lkb_astparam; + if (ua) + xid = ua->xid; + } + + if (lkb->lkb_timestamp) + waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp); + + /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms + r_nodeid r_len r_name */ + + seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n", + lkb->lkb_id, + lkb->lkb_nodeid, + lkb->lkb_remid, + lkb->lkb_ownpid, + (unsigned long long)xid, + lkb->lkb_exflags, + lkb->lkb_flags, + lkb->lkb_status, + lkb->lkb_grmode, + lkb->lkb_rqmode, + waiting, + r->res_nodeid, + r->res_length, + r->res_name); +} + +static int print_locks(struct dlm_rsb *r, struct seq_file *s) +{ + struct dlm_lkb *lkb; + + lock_rsb(r); + + list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) + print_lock(s, lkb, r); + + list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) + print_lock(s, lkb, r); + + list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) + print_lock(s, lkb, r); + + unlock_rsb(r); return 0; } @@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri) read_lock(&ls->ls_rsbtbl[i].lock); if (!list_empty(&ls->ls_rsbtbl[i].list)) { ri->next = ls->ls_rsbtbl[i].list.next; + ri->rsb = list_entry(ri->next, struct dlm_rsb, + res_hashchain); + dlm_hold_rsb(ri->rsb); read_unlock(&ls->ls_rsbtbl[i].lock); break; } @@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri) if (ri->entry >= ls->ls_rsbtbl_size) return 1; } else { + struct dlm_rsb *old = ri->rsb; i = ri->entry; read_lock(&ls->ls_rsbtbl[i].lock); ri->next = ri->next->next; @@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri) ri->next = NULL; ri->entry++; read_unlock(&ls->ls_rsbtbl[i].lock); + dlm_put_rsb(old); goto top; } + ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain); + dlm_hold_rsb(ri->rsb); read_unlock(&ls->ls_rsbtbl[i].lock); + dlm_put_rsb(old); } - ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain); return 0; } @@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls) { struct rsb_iter *ri; - ri = kmalloc(sizeof *ri, GFP_KERNEL); + ri = kzalloc(sizeof *ri, GFP_KERNEL); if (!ri) return NULL; @@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr) { struct rsb_iter *ri = iter_ptr; - print_resource(ri->rsb, file); + if (ri->locks) { + if (ri->header) { + seq_printf(file, "id nodeid remid pid xid exflags flags " + "sts grmode rqmode time_ms r_nodeid " + "r_len r_name\n"); + ri->header = 0; + } + print_locks(ri->rsb, file); + } else { + print_resource(ri->rsb, file); + } return 0; } @@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = { }; /* + * Dump state in compact per-lock listing + */ + +static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos) +{ + struct rsb_iter *ri; + + ri = kzalloc(sizeof *ri, GFP_KERNEL); + if (!ri) + return NULL; + + ri->ls = ls; + ri->entry = 0; + ri->next = NULL; + ri->locks = 1; + + if (*pos == 0) + ri->header = 1; + + if (rsb_iter_next(ri)) { + rsb_iter_free(ri); + return NULL; + } + + return ri; +} + +static void *locks_seq_start(struct seq_file *file, loff_t *pos) +{ + struct rsb_iter *ri; + loff_t n = *pos; + + ri = locks_iter_init(file->private, pos); + if (!ri) + return NULL; + + while (n--) { + if (rsb_iter_next(ri)) { + rsb_iter_free(ri); + return NULL; + } + } + + return ri; +} + +static struct seq_operations locks_seq_ops = { + .start = locks_seq_start, + .next = rsb_seq_next, + .stop = rsb_seq_stop, + .show = rsb_seq_show, +}; + +static int locks_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &locks_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; + + return 0; +} + +static const struct file_operations locks_fops = { + .owner = THIS_MODULE, + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* * dump lkb's on the ls_waiters list */ @@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls) return -ENOMEM; } + memset(name, 0, sizeof(name)); + snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name); + + ls->ls_debug_locks_dentry = debugfs_create_file(name, + S_IFREG | S_IRUGO, + dlm_root, + ls, + &locks_fops); + if (!ls->ls_debug_locks_dentry) { + debugfs_remove(ls->ls_debug_waiters_dentry); + debugfs_remove(ls->ls_debug_rsb_dentry); + return -ENOMEM; + } + return 0; } @@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls) debugfs_remove(ls->ls_debug_rsb_dentry); if (ls->ls_debug_waiters_dentry) debugfs_remove(ls->ls_debug_waiters_dentry); + if (ls->ls_debug_locks_dentry) + debugfs_remove(ls->ls_debug_locks_dentry); } int dlm_register_debugfs(void) diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 30994d68f6a..74901e981e1 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -151,6 +151,7 @@ struct dlm_args { void *bastaddr; int mode; struct dlm_lksb *lksb; + unsigned long timeout; }; @@ -213,6 +214,9 @@ struct dlm_args { #define DLM_IFL_OVERLAP_UNLOCK 0x00080000 #define DLM_IFL_OVERLAP_CANCEL 0x00100000 #define DLM_IFL_ENDOFLIFE 0x00200000 +#define DLM_IFL_WATCH_TIMEWARN 0x00400000 +#define DLM_IFL_TIMEOUT_CANCEL 0x00800000 +#define DLM_IFL_DEADLOCK_CANCEL 0x01000000 #define DLM_IFL_USER 0x00000001 #define DLM_IFL_ORPHAN 0x00000002 @@ -243,6 +247,9 @@ struct dlm_lkb { struct list_head lkb_wait_reply; /* waiting for remote reply */ struct list_head lkb_astqueue; /* need ast to be sent */ struct list_head lkb_ownqueue; /* list of locks for a process */ + struct list_head lkb_time_list; + unsigned long lkb_timestamp; + unsigned long lkb_timeout_cs; char *lkb_lvbptr; struct dlm_lksb *lkb_lksb; /* caller's status block */ @@ -447,12 +454,16 @@ struct dlm_ls { struct mutex ls_orphans_mutex; struct list_head ls_orphans; + struct mutex ls_timeout_mutex; + struct list_head ls_timeout; + struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes_gone; /* dead node list, recovery */ int ls_num_nodes; /* number of nodes in ls */ int ls_low_nodeid; int ls_total_weight; int *ls_node_array; + gfp_t ls_allocation; struct dlm_rsb ls_stub_rsb; /* for returning errors */ struct dlm_lkb ls_stub_lkb; /* for returning errors */ @@ -460,9 +471,12 @@ struct dlm_ls { struct dentry *ls_debug_rsb_dentry; /* debugfs */ struct dentry *ls_debug_waiters_dentry; /* debugfs */ + struct dentry *ls_debug_locks_dentry; /* debugfs */ wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; + struct completion ls_members_done; + int ls_members_result; struct miscdevice ls_device; @@ -472,6 +486,7 @@ struct dlm_ls { struct task_struct *ls_recoverd_task; struct mutex ls_recoverd_active; spinlock_t ls_recover_lock; + unsigned long ls_recover_begin; /* jiffies timestamp */ uint32_t ls_recover_status; /* DLM_RS_ */ uint64_t ls_recover_seq; struct dlm_recover *ls_recover_args; @@ -501,6 +516,7 @@ struct dlm_ls { #define LSFL_RCOM_READY 3 #define LSFL_RCOM_WAIT 4 #define LSFL_UEVENT_WAIT 5 +#define LSFL_TIMEWARN 6 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ @@ -518,6 +534,7 @@ struct dlm_user_args { void __user *castaddr; void __user *bastparam; void __user *bastaddr; + uint64_t xid; }; #define DLM_PROC_FLAGS_CLOSING 1 diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index d8d6e729f96..b455919c199 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode); static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); static int send_remove(struct dlm_rsb *r); static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, struct dlm_message *ms); static int receive_extralen(struct dlm_message *ms); static void do_purge(struct dlm_ls *ls, int nodeid, int pid); +static void del_timeout(struct dlm_lkb *lkb); +void dlm_timeout_warn(struct dlm_lkb *lkb); /* * Lock compatibilty matrix - thanks Steve @@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r) /* Threads cannot use the lockspace while it's being recovered */ -static inline void lock_recovery(struct dlm_ls *ls) +static inline void dlm_lock_recovery(struct dlm_ls *ls) { down_read(&ls->ls_in_recovery); } -static inline void unlock_recovery(struct dlm_ls *ls) +void dlm_unlock_recovery(struct dlm_ls *ls) { up_read(&ls->ls_in_recovery); } -static inline int lock_recovery_try(struct dlm_ls *ls) +int dlm_lock_recovery_try(struct dlm_ls *ls) { return down_read_trylock(&ls->ls_in_recovery); } @@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) if (is_master_copy(lkb)) return; + del_timeout(lkb); + DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); + /* if the operation was a cancel, then return -DLM_ECANCEL, if a + timeout caused the cancel then return -ETIMEDOUT */ + if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) { + lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL; + rv = -ETIMEDOUT; + } + + if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) { + lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL; + rv = -EDEADLK; + } + lkb->lkb_lksb->sb_status = rv; lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; @@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) kref_init(&lkb->lkb_ref); INIT_LIST_HEAD(&lkb->lkb_ownqueue); INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); + INIT_LIST_HEAD(&lkb->lkb_time_list); get_random_bytes(&bucket, sizeof(bucket)); bucket &= (ls->ls_lkbtbl_size - 1); @@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls) { int i; - if (dlm_locking_stopped(ls)) - return; - for (i = 0; i < ls->ls_rsbtbl_size; i++) { shrink_bucket(ls, i); + if (dlm_locking_stopped(ls)) + break; cond_resched(); } } +static void add_timeout(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + + if (is_master_copy(lkb)) { + lkb->lkb_timestamp = jiffies; + return; + } + + if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) && + !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { + lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN; + goto add_it; + } + if (lkb->lkb_exflags & DLM_LKF_TIMEOUT) + goto add_it; + return; + + add_it: + DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb);); + mutex_lock(&ls->ls_timeout_mutex); + hold_lkb(lkb); + lkb->lkb_timestamp = jiffies; + list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout); + mutex_unlock(&ls->ls_timeout_mutex); +} + +static void del_timeout(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + + mutex_lock(&ls->ls_timeout_mutex); + if (!list_empty(&lkb->lkb_time_list)) { + list_del_init(&lkb->lkb_time_list); + unhold_lkb(lkb); + } + mutex_unlock(&ls->ls_timeout_mutex); +} + +/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and + lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex + and then lock rsb because of lock ordering in add_timeout. We may need + to specify some special timeout-related bits in the lkb that are just to + be accessed under the timeout_mutex. */ + +void dlm_scan_timeout(struct dlm_ls *ls) +{ + struct dlm_rsb *r; + struct dlm_lkb *lkb; + int do_cancel, do_warn; + + for (;;) { + if (dlm_locking_stopped(ls)) + break; + + do_cancel = 0; + d |