diff options
Diffstat (limited to 'fs/dlm')
-rw-r--r-- | fs/dlm/Kconfig | 2 | ||||
-rw-r--r-- | fs/dlm/Makefile | 1 | ||||
-rw-r--r-- | fs/dlm/config.c | 39 | ||||
-rw-r--r-- | fs/dlm/config.h | 1 | ||||
-rw-r--r-- | fs/dlm/debug_fs.c | 186 | ||||
-rw-r--r-- | fs/dlm/dlm_internal.h | 17 | ||||
-rw-r--r-- | fs/dlm/lock.c | 470 | ||||
-rw-r--r-- | fs/dlm/lock.h | 13 | ||||
-rw-r--r-- | fs/dlm/lockspace.c | 86 | ||||
-rw-r--r-- | fs/dlm/lowcomms.c | 23 | ||||
-rw-r--r-- | fs/dlm/main.c | 11 | ||||
-rw-r--r-- | fs/dlm/member.c | 11 | ||||
-rw-r--r-- | fs/dlm/netlink.c | 153 | ||||
-rw-r--r-- | fs/dlm/rcom.c | 13 | ||||
-rw-r--r-- | fs/dlm/recoverd.c | 4 | ||||
-rw-r--r-- | fs/dlm/user.c | 129 |
16 files changed, 968 insertions, 191 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 69a94690e49..54bcc00ec8d 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -3,7 +3,7 @@ menu "Distributed Lock Manager" config DLM tristate "Distributed Lock Manager (DLM)" - depends on IPV6 || IPV6=n + depends on SYSFS && (IPV6 || IPV6=n) select CONFIGFS_FS select IP_SCTP help diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index 604cf7dc5f3..d248e60951b 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile @@ -8,6 +8,7 @@ dlm-y := ast.o \ member.o \ memory.o \ midcomms.o \ + netlink.o \ lowcomms.o \ rcom.o \ recover.o \ diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 822abdcd143..2f8e3c81bc1 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -90,6 +90,7 @@ struct cluster { unsigned int cl_scan_secs; unsigned int cl_log_debug; unsigned int cl_protocol; + unsigned int cl_timewarn_cs; }; enum { @@ -103,6 +104,7 @@ enum { CLUSTER_ATTR_SCAN_SECS, CLUSTER_ATTR_LOG_DEBUG, CLUSTER_ATTR_PROTOCOL, + CLUSTER_ATTR_TIMEWARN_CS, }; struct cluster_attribute { @@ -131,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, return len; } -#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \ - .attr = { .ca_name = __stringify(_name), \ - .ca_mode = _mode, \ - .ca_owner = THIS_MODULE }, \ - .show = _read, \ - .store = _write, \ -} - #define CLUSTER_ATTR(name, check_zero) \ static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ { \ @@ -162,6 +156,7 @@ CLUSTER_ATTR(toss_secs, 1); CLUSTER_ATTR(scan_secs, 1); CLUSTER_ATTR(log_debug, 0); CLUSTER_ATTR(protocol, 0); +CLUSTER_ATTR(timewarn_cs, 1); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, @@ -174,6 +169,7 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, + [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, NULL, }; @@ -429,6 +425,8 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_toss_secs = dlm_config.ci_toss_secs; cl->cl_scan_secs = dlm_config.ci_scan_secs; cl->cl_log_debug = dlm_config.ci_log_debug; + cl->cl_protocol = dlm_config.ci_protocol; + cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -609,7 +607,7 @@ static struct clusters clusters_root = { int dlm_config_init(void) { config_group_init(&clusters_root.subsys.su_group); - init_MUTEX(&clusters_root.subsys.su_sem); + mutex_init(&clusters_root.subsys.su_mutex); return configfs_register_subsystem(&clusters_root.subsys); } @@ -748,9 +746,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) static struct space *get_space(char *name) { + struct config_item *i; + if (!space_list) return NULL; - return to_space(config_group_find_obj(space_list, name)); + + mutex_lock(&space_list->cg_subsys->su_mutex); + i = config_group_find_item(space_list, name); + mutex_unlock(&space_list->cg_subsys->su_mutex); + + return to_space(i); } static void put_space(struct space *sp) @@ -767,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) if (!comm_list) return NULL; - down(&clusters_root.subsys.su_sem); + mutex_lock(&clusters_root.subsys.su_mutex); list_for_each_entry(i, &comm_list->cg_children, ci_entry) { cm = to_comm(i); @@ -776,20 +781,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) if (cm->nodeid != nodeid) continue; found = 1; + config_item_get(i); break; } else { if (!cm->addr_count || memcmp(cm->addr[0], addr, sizeof(*addr))) continue; found = 1; + config_item_get(i); break; } } - up(&clusters_root.subsys.su_sem); + mutex_unlock(&clusters_root.subsys.su_mutex); - if (found) - config_item_get(i); - else + if (!found) cm = NULL; return cm; } @@ -909,6 +914,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_SCAN_SECS 5 #define DEFAULT_LOG_DEBUG 0 #define DEFAULT_PROTOCOL 0 +#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, @@ -920,6 +926,7 @@ struct dlm_config_info dlm_config = { .ci_toss_secs = DEFAULT_TOSS_SECS, .ci_scan_secs = DEFAULT_SCAN_SECS, .ci_log_debug = DEFAULT_LOG_DEBUG, - .ci_protocol = DEFAULT_PROTOCOL + .ci_protocol = DEFAULT_PROTOCOL, + .ci_timewarn_cs = DEFAULT_TIMEWARN_CS }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 967cc3d72e5..a3170fe2209 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -27,6 +27,7 @@ struct dlm_config_info { int ci_scan_secs; int ci_log_debug; int ci_protocol; + int ci_timewarn_cs; }; extern struct dlm_config_info dlm_config; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 61ba670b9e0..12c3bfd5e66 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -17,6 +17,7 @@ #include <linux/debugfs.h> #include "dlm_internal.h" +#include "lock.h" #define DLM_DEBUG_BUF_LEN 4096 static char debug_buf[DLM_DEBUG_BUF_LEN]; @@ -26,6 +27,8 @@ static struct dentry *dlm_root; struct rsb_iter { int entry; + int locks; + int header; struct dlm_ls *ls; struct list_head *next; struct dlm_rsb *rsb; @@ -57,8 +60,8 @@ static char *print_lockmode(int mode) } } -static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, - struct dlm_rsb *res) +static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb, + struct dlm_rsb *res) { seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); @@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) struct dlm_lkb *lkb; int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list; + lock_rsb(res); + seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); for (i = 0; i < res->res_length; i++) { if (isprint(res->res_name[i])) @@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) /* Print the locks attached to this resource */ seq_printf(s, "Granted Queue\n"); list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); seq_printf(s, "Conversion Queue\n"); list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); seq_printf(s, "Waiting Queue\n"); list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); if (list_empty(&res->res_lookup)) goto out; @@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) seq_printf(s, "\n"); } out: + unlock_rsb(res); + return 0; +} + +static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r) +{ + struct dlm_user_args *ua; + unsigned int waiting = 0; + uint64_t xid = 0; + + if (lkb->lkb_flags & DLM_IFL_USER) { + ua = (struct dlm_user_args *) lkb->lkb_astparam; + if (ua) + xid = ua->xid; + } + + if (lkb->lkb_timestamp) + waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp); + + /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms + r_nodeid r_len r_name */ + + seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n", + lkb->lkb_id, + lkb->lkb_nodeid, + lkb->lkb_remid, + lkb->lkb_ownpid, + (unsigned long long)xid, + lkb->lkb_exflags, + lkb->lkb_flags, + lkb->lkb_status, + lkb->lkb_grmode, + lkb->lkb_rqmode, + waiting, + r->res_nodeid, + r->res_length, + r->res_name); +} + +static int print_locks(struct dlm_rsb *r, struct seq_file *s) +{ + struct dlm_lkb *lkb; + + lock_rsb(r); + + list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) + print_lock(s, lkb, r); + + list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) + print_lock(s, lkb, r); + + list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) + print_lock(s, lkb, r); + + unlock_rsb(r); return 0; } @@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri) read_lock(&ls->ls_rsbtbl[i].lock); if (!list_empty(&ls->ls_rsbtbl[i].list)) { ri->next = ls->ls_rsbtbl[i].list.next; + ri->rsb = list_entry(ri->next, struct dlm_rsb, + res_hashchain); + dlm_hold_rsb(ri->rsb); read_unlock(&ls->ls_rsbtbl[i].lock); break; } @@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri) if (ri->entry >= ls->ls_rsbtbl_size) return 1; } else { + struct dlm_rsb *old = ri->rsb; i = ri->entry; read_lock(&ls->ls_rsbtbl[i].lock); ri->next = ri->next->next; @@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri) ri->next = NULL; ri->entry++; read_unlock(&ls->ls_rsbtbl[i].lock); + dlm_put_rsb(old); goto top; } + ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain); + dlm_hold_rsb(ri->rsb); read_unlock(&ls->ls_rsbtbl[i].lock); + dlm_put_rsb(old); } - ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain); return 0; } @@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls) { struct rsb_iter *ri; - ri = kmalloc(sizeof *ri, GFP_KERNEL); + ri = kzalloc(sizeof *ri, GFP_KERNEL); if (!ri) return NULL; @@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr) { struct rsb_iter *ri = iter_ptr; - print_resource(ri->rsb, file); + if (ri->locks) { + if (ri->header) { + seq_printf(file, "id nodeid remid pid xid exflags flags " + "sts grmode rqmode time_ms r_nodeid " + "r_len r_name\n"); + ri->header = 0; + } + print_locks(ri->rsb, file); + } else { + print_resource(ri->rsb, file); + } return 0; } @@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = { }; /* + * Dump state in compact per-lock listing + */ + +static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos) +{ + struct rsb_iter *ri; + + ri = kzalloc(sizeof *ri, GFP_KERNEL); + if (!ri) + return NULL; + + ri->ls = ls; + ri->entry = 0; + ri->next = NULL; + ri->locks = 1; + + if (*pos == 0) + ri->header = 1; + + if (rsb_iter_next(ri)) { + rsb_iter_free(ri); + return NULL; + } + + return ri; +} + +static void *locks_seq_start(struct seq_file *file, loff_t *pos) +{ + struct rsb_iter *ri; + loff_t n = *pos; + + ri = locks_iter_init(file->private, pos); + if (!ri) + return NULL; + + while (n--) { + if (rsb_iter_next(ri)) { + rsb_iter_free(ri); + return NULL; + } + } + + return ri; +} + +static struct seq_operations locks_seq_ops = { + .start = locks_seq_start, + .next = rsb_seq_next, + .stop = rsb_seq_stop, + .show = rsb_seq_show, +}; + +static int locks_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &locks_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; + + return 0; +} + +static const struct file_operations locks_fops = { + .owner = THIS_MODULE, + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* * dump lkb's on the ls_waiters list */ @@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls) return -ENOMEM; } + memset(name, 0, sizeof(name)); + snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name); + + ls->ls_debug_locks_dentry = debugfs_create_file(name, + S_IFREG | S_IRUGO, + dlm_root, + ls, + &locks_fops); + if (!ls->ls_debug_locks_dentry) { + debugfs_remove(ls->ls_debug_waiters_dentry); + debugfs_remove(ls->ls_debug_rsb_dentry); + return -ENOMEM; + } + return 0; } @@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls) debugfs_remove(ls->ls_debug_rsb_dentry); if (ls->ls_debug_waiters_dentry) debugfs_remove(ls->ls_debug_waiters_dentry); + if (ls->ls_debug_locks_dentry) + debugfs_remove(ls->ls_debug_locks_dentry); } int dlm_register_debugfs(void) diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 30994d68f6a..74901e981e1 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -151,6 +151,7 @@ struct dlm_args { void *bastaddr; int mode; struct dlm_lksb *lksb; + unsigned long timeout; }; @@ -213,6 +214,9 @@ struct dlm_args { #define DLM_IFL_OVERLAP_UNLOCK 0x00080000 #define DLM_IFL_OVERLAP_CANCEL 0x00100000 #define DLM_IFL_ENDOFLIFE 0x00200000 +#define DLM_IFL_WATCH_TIMEWARN 0x00400000 +#define DLM_IFL_TIMEOUT_CANCEL 0x00800000 +#define DLM_IFL_DEADLOCK_CANCEL 0x01000000 #define DLM_IFL_USER 0x00000001 #define DLM_IFL_ORPHAN 0x00000002 @@ -243,6 +247,9 @@ struct dlm_lkb { struct list_head lkb_wait_reply; /* waiting for remote reply */ struct list_head lkb_astqueue; /* need ast to be sent */ struct list_head lkb_ownqueue; /* list of locks for a process */ + struct list_head lkb_time_list; + unsigned long lkb_timestamp; + unsigned long lkb_timeout_cs; char *lkb_lvbptr; struct dlm_lksb *lkb_lksb; /* caller's status block */ @@ -447,12 +454,16 @@ struct dlm_ls { struct mutex ls_orphans_mutex; struct list_head ls_orphans; + struct mutex ls_timeout_mutex; + struct list_head ls_timeout; + struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes_gone; /* dead node list, recovery */ int ls_num_nodes; /* number of nodes in ls */ int ls_low_nodeid; int ls_total_weight; int *ls_node_array; + gfp_t ls_allocation; struct dlm_rsb ls_stub_rsb; /* for returning errors */ struct dlm_lkb ls_stub_lkb; /* for returning errors */ @@ -460,9 +471,12 @@ struct dlm_ls { struct dentry *ls_debug_rsb_dentry; /* debugfs */ struct dentry *ls_debug_waiters_dentry; /* debugfs */ + struct dentry *ls_debug_locks_dentry; /* debugfs */ wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; + struct completion ls_members_done; + int ls_members_result; struct miscdevice ls_device; @@ -472,6 +486,7 @@ struct dlm_ls { struct task_struct *ls_recoverd_task; struct mutex ls_recoverd_active; spinlock_t ls_recover_lock; + unsigned long ls_recover_begin; /* jiffies timestamp */ uint32_t ls_recover_status; /* DLM_RS_ */ uint64_t ls_recover_seq; struct dlm_recover *ls_recover_args; @@ -501,6 +516,7 @@ struct dlm_ls { #define LSFL_RCOM_READY 3 #define LSFL_RCOM_WAIT 4 #define LSFL_UEVENT_WAIT 5 +#define LSFL_TIMEWARN 6 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ @@ -518,6 +534,7 @@ struct dlm_user_args { void __user *castaddr; void __user *bastparam; void __user *bastaddr; + uint64_t xid; }; #define DLM_PROC_FLAGS_CLOSING 1 diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index d8d6e729f96..b455919c199 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode); static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); static int send_remove(struct dlm_rsb *r); static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, struct dlm_message *ms); static int receive_extralen(struct dlm_message *ms); static void do_purge(struct dlm_ls *ls, int nodeid, int pid); +static void del_timeout(struct dlm_lkb *lkb); +void dlm_timeout_warn(struct dlm_lkb *lkb); /* * Lock compatibilty matrix - thanks Steve @@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r) /* Threads cannot use the lockspace while it's being recovered */ -static inline void lock_recovery(struct dlm_ls *ls) +static inline void dlm_lock_recovery(struct dlm_ls *ls) { down_read(&ls->ls_in_recovery); } -static inline void unlock_recovery(struct dlm_ls *ls) +void dlm_unlock_recovery(struct dlm_ls *ls) { up_read(&ls->ls_in_recovery); } -static inline int lock_recovery_try(struct dlm_ls *ls) +int dlm_lock_recovery_try(struct dlm_ls *ls) { return down_read_trylock(&ls->ls_in_recovery); } @@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) if (is_master_copy(lkb)) return; + del_timeout(lkb); + DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); + /* if the operation was a cancel, then return -DLM_ECANCEL, if a + timeout caused the cancel then return -ETIMEDOUT */ + if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) { + lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL; + rv = -ETIMEDOUT; + } + + if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) { + lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL; + rv = -EDEADLK; + } + lkb->lkb_lksb->sb_status = rv; lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; @@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) kref_init(&lkb->lkb_ref); INIT_LIST_HEAD(&lkb->lkb_ownqueue); INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); + INIT_LIST_HEAD(&lkb->lkb_time_list); get_random_bytes(&bucket, sizeof(bucket)); bucket &= (ls->ls_lkbtbl_size - 1); @@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls) { int i; - if (dlm_locking_stopped(ls)) - return; - for (i = 0; i < ls->ls_rsbtbl_size; i++) { shrink_bucket(ls, i); + if (dlm_locking_stopped(ls)) + break; cond_resched(); } } +static void add_timeout(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + + if (is_master_copy(lkb)) { + lkb->lkb_timestamp = jiffies; + return; + } + + if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) && + !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { + lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN; + goto add_it; + } + if (lkb->lkb_exflags & DLM_LKF_TIMEOUT) + goto add_it; + return; + + add_it: + DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb);); + mutex_lock(&ls->ls_timeout_mutex); + hold_lkb(lkb); + lkb->lkb_timestamp = jiffies; + list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout); + mutex_unlock(&ls->ls_timeout_mutex); +} + +static void del_timeout(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + + mutex_lock(&ls->ls_timeout_mutex); + if (!list_empty(&lkb->lkb_time_list)) { + list_del_init(&lkb->lkb_time_list); + unhold_lkb(lkb); + } + mutex_unlock(&ls->ls_timeout_mutex); +} + +/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and + lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex + and then lock rsb because of lock ordering in add_timeout. We may need + to specify some special timeout-related bits in the lkb that are just to + be accessed under the timeout_mutex. */ + +void dlm_scan_timeout(struct dlm_ls *ls) +{ + struct dlm_rsb *r; + struct dlm_lkb *lkb; + int do_cancel, do_warn; + + for (;;) { + if (dlm_locking_stopped(ls)) + break; + + do_cancel = 0; + do_warn = 0; + mutex_lock(&ls->ls_timeout_mutex); + list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) { + + if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) && + time_after_eq(jiffies, lkb->lkb_timestamp + + lkb->lkb_timeout_cs * HZ/100)) + do_cancel = 1; + + if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) && + time_after_eq(jiffies, lkb->lkb_timestamp + + dlm_config.ci_timewarn_cs * HZ/100)) + do_warn = 1; + + if (!do_cancel && !do_warn) + continue; + hold_lkb(lkb); + break; + } + mutex_unlock(&ls->ls_timeout_mutex); + + if (!do_cancel && !do_warn) + break; + + r = lkb->lkb_resource; + hold_rsb(r); + lock_rsb(r); + + if (do_warn) { + /* clear flag so we only warn once */ + lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN; + if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT)) + del_timeout(lkb); + dlm_timeout_warn(lkb); + } + + if (do_cancel) { + log_debug(ls, "timeout cancel %x node %d %s", + lkb->lkb_id, lkb->lkb_nodeid, r->res_name); + lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN; + lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL; + del_timeout(lkb); + _cancel_lock(r, lkb); + } + + unlock_rsb(r); + unhold_rsb(r); + dlm_put_lkb(lkb); + } +} + +/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping + dlm_recoverd before checking/setting ls_recover_begin. */ + +void dlm_adjust_timeouts(struct dlm_ls *ls) +{ + struct dlm_lkb *lkb; + long adj = jiffies - ls->ls_recover_begin; + + ls->ls_recover_begin = 0; + mutex_lock(&ls->ls_timeout_mutex); + list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) + lkb->lkb_timestamp += adj; + mutex_unlock(&ls->ls_timeout_mutex); +} + /* lkb is master or local copy */ static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) @@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) * queue for one resource. The granted mode of each lock blocks the requested * mode of the other lock." * - * Part 2: if the granted mode of lkb is preventing the first lkb in the - * convert queue from being granted, then demote lkb (set grmode to NL). - * This second form requires that we check for conv-deadlk even when - * now == 0 in _can_be_granted(). + * Part 2: if the granted mode of lkb is preventing an earlier lkb in the + * convert queue from being granted, then deadlk/demote lkb. * * Example: * Granted Queue: empty @@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) * * The first lock can't be granted because of the granted mode of the second * lock and the second lock can't be granted because it's not first in the - * list. We demote the granted mode of the second lock (the lkb passed to this - * function). + * list. We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we + * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK + * flag set and return DEMOTED in the lksb flags. + * + * Originally, this function detected conv-deadlk in a more limited scope: + * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or + * - if lkb1 was the first entry in the queue (not just earlier), and was + * blocked by the granted mode of lkb2, and there was nothing on the + * granted queue preventing lkb1 from being granted immediately, i.e. + * lkb2 was the only thing preventing lkb1 from being granted. + * + * That second condition meant we'd only say there was conv-deadlk if + * resolving it (by demotion) would lead to the first lock on the convert + * queue being granted right away. It allowed conversion deadlocks to exist + * between locks on the convert queue while they couldn't be granted anyway. * - * After the resolution, the "grant pending" function needs to go back and try - * to grant locks on the convert queue again since the first lock can now be - * granted. + * Now, we detect and take action on conversion deadlocks immediately when + * they're created, even if they may not be immediately consequential. If + * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted + * mode that would prevent lkb1's conversion from being granted, we do a + * deadlk/demote on lkb2 right away and don't let it onto the convert queue. + * I think this means that the lkb_is_ahead condition below should always + * be zero, i.e. there will never be conv-deadlk between two locks that are + * both already on the convert queue. */ -static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) +static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2) { - struct dlm_lkb *this, *first = NULL, *self = NULL; + struct dlm_lkb *lkb1; + int lkb_is_ahead = 0; - list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { - if (!first) - first = this; - if (this == lkb) { - self = lkb; + list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) { + if (lkb1 == lkb2) { + lkb_is_ahead = 1; continue; } - if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) - return 1; - } - - /* if lkb is on the convert queue and is preventing the first - from being granted, then there's deadlock and we demote lkb. - multiple converting locks may need to do this before the first - converting lock can be granted. */ - - if (self && self != first) { - if (!modes_compat(lkb, first) && - !queue_conflict(&rsb->res_grantqueue, first)) - return 1; + if (!lkb_is_ahead) { + if (!modes_compat(lkb2, lkb1)) + return 1; + } else { + if (!modes_compat(lkb2, lkb1) && + !modes_compat(lkb1, lkb2)) + return 1; + } } - return 0; } @@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) if (!now && !conv && list_empty(&r->res_convertqueue) && first_in_list(lkb, &r->res_waitqueue)) return 1; - out: - /* - * The following, enabled by CONVDEADLK, departs from VMS. - */ - - if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) && - conversion_deadlock_detect(r, lkb)) { - lkb->lkb_grmode = DLM_LOCK_NL; - lkb->lkb_sbflags |= DLM_SBF_DEMOTED; - } - return 0; } -/* - * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a - * simple way to provide a big optimization to applications that can use them. - */ - -static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) +static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, + int *err) { - uint32_t flags = lkb->lkb_exflags; int rv; int8_t alt = 0, rqmode = lkb->lkb_rqmode; + int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV); + + if (err) + *err = 0; rv = _can_be_granted(r, lkb, now); if (rv) goto out; - if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) + /* + * The CONVDEADLK flag is non-standard and tells the dlm to resolve + * conversion deadlocks by demoting grmode to NL, otherwise the dlm + * cancels one of the locks. + */ + + if (is_convert && can_be_queued(lkb) && + conversion_deadlock_detect(r, lkb)) { + if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) { + lkb->lkb_grmode = DLM_LOCK_NL; + lkb->lkb_sbflags |= DLM_SBF_DEMOTED; + } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { + if (err) + *err = -EDEADLK; + else { + log_print("can_be_granted deadlock %x now %d", + lkb->lkb_id, now); + dlm_dump_rsb(r); + } + } goto out; + } - if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) + /* + * The ALTPR and ALTCW flags are non-standard and tell the dlm to try + * to grant a request in a mode other than the normal rqmode. It's a + * simple way to provide a big optimization to applications that can + * use them. + */ + + if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR)) alt = DLM_LOCK_PR; - else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) + else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW)) alt = DLM_LOCK_CW; if (alt) { @@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) return rv; } +/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock + for locks pending on the convert list. Once verified (watch for these + log_prints), we should be able to just call _can_be_granted() and not + bother with the demote/deadlk cases here (and there's no easy way to deal + with a deadlk here, we'd have to generate something like grant_lock with + the deadlk error.) */ + +/* returns the highest requested mode of all blocked conversions */ + static int grant_pending_convert(struct dlm_rsb *r, int high) { struct dlm_lkb *lkb, *s; int hi, demoted, quit, grant_restart, demote_restart; + int deadlk; quit = 0; restart: @@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high) list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { demoted = is_demoted(lkb); - if (can_be_granted(r, lkb, 0)) { + deadlk = 0; + + if (can_be_granted(r, lkb, 0, &deadlk)) { grant_lock_pending(r, lkb); grant_restart = 1; - } else { - hi = max_t(int, lkb->lkb_rqmode, hi); - if (!demoted && is_demoted(lkb)) - demote_restart = 1; + continue; } + + if (!demoted && is_demoted(lkb)) { + log_print("WARN: pending demoted %x node %d %s", + lkb->lkb_id, lkb->lkb_nodeid, r->res_name); + demote_restart = 1; + continue; + } + + if (deadlk) { + log_print("WARN: pending deadlock %x node %d %s", + lkb->lkb_id, lkb->lkb_nodeid, r->res_name); + dlm_dump_rsb(r); + continue; + } + + hi = max_t(int, lkb->lkb_rqmode, hi); } if (grant_restart) @@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high) struct dlm_lkb *lkb, *s; list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { - if (can_be_granted(r, lkb, 0)) + if (can_be_granted(r, lkb, 0, NULL)) grant_lock_pending(r, lkb); else high = max_t(int, lkb->lkb_rqmode, high); @@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error) } static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, - int namelen, uint32_t parent_lkid, void *ast, + int namelen, unsigned long timeout_cs, void *ast, void *astarg, void *bast, struct dlm_args *args) { int rv = -EINVAL; @@ -1776,10 +1964,6 @@ static |