aboutsummaryrefslogtreecommitdiff
path: root/fs/dlm
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/Kconfig2
-rw-r--r--fs/dlm/Makefile1
-rw-r--r--fs/dlm/config.c39
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/debug_fs.c186
-rw-r--r--fs/dlm/dlm_internal.h17
-rw-r--r--fs/dlm/lock.c470
-rw-r--r--fs/dlm/lock.h13
-rw-r--r--fs/dlm/lockspace.c86
-rw-r--r--fs/dlm/lowcomms.c23
-rw-r--r--fs/dlm/main.c11
-rw-r--r--fs/dlm/member.c11
-rw-r--r--fs/dlm/netlink.c153
-rw-r--r--fs/dlm/rcom.c13
-rw-r--r--fs/dlm/recoverd.c4
-rw-r--r--fs/dlm/user.c129
16 files changed, 968 insertions, 191 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 69a94690e49..54bcc00ec8d 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,7 +3,7 @@ menu "Distributed Lock Manager"
config DLM
tristate "Distributed Lock Manager (DLM)"
- depends on IPV6 || IPV6=n
+ depends on SYSFS && (IPV6 || IPV6=n)
select CONFIGFS_FS
select IP_SCTP
help
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 604cf7dc5f3..d248e60951b 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,6 +8,7 @@ dlm-y := ast.o \
member.o \
memory.o \
midcomms.o \
+ netlink.o \
lowcomms.o \
rcom.o \
recover.o \
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 822abdcd143..2f8e3c81bc1 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -90,6 +90,7 @@ struct cluster {
unsigned int cl_scan_secs;
unsigned int cl_log_debug;
unsigned int cl_protocol;
+ unsigned int cl_timewarn_cs;
};
enum {
@@ -103,6 +104,7 @@ enum {
CLUSTER_ATTR_SCAN_SECS,
CLUSTER_ATTR_LOG_DEBUG,
CLUSTER_ATTR_PROTOCOL,
+ CLUSTER_ATTR_TIMEWARN_CS,
};
struct cluster_attribute {
@@ -131,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
return len;
}
-#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
- .attr = { .ca_name = __stringify(_name), \
- .ca_mode = _mode, \
- .ca_owner = THIS_MODULE }, \
- .show = _read, \
- .store = _write, \
-}
-
#define CLUSTER_ATTR(name, check_zero) \
static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
{ \
@@ -162,6 +156,7 @@ CLUSTER_ATTR(toss_secs, 1);
CLUSTER_ATTR(scan_secs, 1);
CLUSTER_ATTR(log_debug, 0);
CLUSTER_ATTR(protocol, 0);
+CLUSTER_ATTR(timewarn_cs, 1);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +169,7 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+ [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
NULL,
};
@@ -429,6 +425,8 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_toss_secs = dlm_config.ci_toss_secs;
cl->cl_scan_secs = dlm_config.ci_scan_secs;
cl->cl_log_debug = dlm_config.ci_log_debug;
+ cl->cl_protocol = dlm_config.ci_protocol;
+ cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
@@ -609,7 +607,7 @@ static struct clusters clusters_root = {
int dlm_config_init(void)
{
config_group_init(&clusters_root.subsys.su_group);
- init_MUTEX(&clusters_root.subsys.su_sem);
+ mutex_init(&clusters_root.subsys.su_mutex);
return configfs_register_subsystem(&clusters_root.subsys);
}
@@ -748,9 +746,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
static struct space *get_space(char *name)
{
+ struct config_item *i;
+
if (!space_list)
return NULL;
- return to_space(config_group_find_obj(space_list, name));
+
+ mutex_lock(&space_list->cg_subsys->su_mutex);
+ i = config_group_find_item(space_list, name);
+ mutex_unlock(&space_list->cg_subsys->su_mutex);
+
+ return to_space(i);
}
static void put_space(struct space *sp)
@@ -767,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
if (!comm_list)
return NULL;
- down(&clusters_root.subsys.su_sem);
+ mutex_lock(&clusters_root.subsys.su_mutex);
list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
cm = to_comm(i);
@@ -776,20 +781,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
if (cm->nodeid != nodeid)
continue;
found = 1;
+ config_item_get(i);
break;
} else {
if (!cm->addr_count ||
memcmp(cm->addr[0], addr, sizeof(*addr)))
continue;
found = 1;
+ config_item_get(i);
break;
}
}
- up(&clusters_root.subsys.su_sem);
+ mutex_unlock(&clusters_root.subsys.su_mutex);
- if (found)
- config_item_get(i);
- else
+ if (!found)
cm = NULL;
return cm;
}
@@ -909,6 +914,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_SCAN_SECS 5
#define DEFAULT_LOG_DEBUG 0
#define DEFAULT_PROTOCOL 0
+#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
struct dlm_config_info dlm_config = {
.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +926,7 @@ struct dlm_config_info dlm_config = {
.ci_toss_secs = DEFAULT_TOSS_SECS,
.ci_scan_secs = DEFAULT_SCAN_SECS,
.ci_log_debug = DEFAULT_LOG_DEBUG,
- .ci_protocol = DEFAULT_PROTOCOL
+ .ci_protocol = DEFAULT_PROTOCOL,
+ .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 967cc3d72e5..a3170fe2209 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,6 +27,7 @@ struct dlm_config_info {
int ci_scan_secs;
int ci_log_debug;
int ci_protocol;
+ int ci_timewarn_cs;
};
extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 61ba670b9e0..12c3bfd5e66 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
#include <linux/debugfs.h>
#include "dlm_internal.h"
+#include "lock.h"
#define DLM_DEBUG_BUF_LEN 4096
static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
struct rsb_iter {
int entry;
+ int locks;
+ int header;
struct dlm_ls *ls;
struct list_head *next;
struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
}
}
-static void print_lock(struct seq_file *s, struct dlm_lkb *lkb,
- struct dlm_rsb *res)
+static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
+ struct dlm_rsb *res)
{
seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
struct dlm_lkb *lkb;
int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
+ lock_rsb(res);
+
seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
for (i = 0; i < res->res_length; i++) {
if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
/* Print the locks attached to this resource */
seq_printf(s, "Granted Queue\n");
list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
- print_lock(s, lkb, res);
+ print_resource_lock(s, lkb, res);
seq_printf(s, "Conversion Queue\n");
list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
- print_lock(s, lkb, res);
+ print_resource_lock(s, lkb, res);
seq_printf(s, "Waiting Queue\n");
list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
- print_lock(s, lkb, res);
+ print_resource_lock(s, lkb, res);
if (list_empty(&res->res_lookup))
goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
seq_printf(s, "\n");
}
out:
+ unlock_rsb(res);
+ return 0;
+}
+
+static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
+{
+ struct dlm_user_args *ua;
+ unsigned int waiting = 0;
+ uint64_t xid = 0;
+
+ if (lkb->lkb_flags & DLM_IFL_USER) {
+ ua = (struct dlm_user_args *) lkb->lkb_astparam;
+ if (ua)
+ xid = ua->xid;
+ }
+
+ if (lkb->lkb_timestamp)
+ waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+
+ /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
+ r_nodeid r_len r_name */
+
+ seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
+ lkb->lkb_id,
+ lkb->lkb_nodeid,
+ lkb->lkb_remid,
+ lkb->lkb_ownpid,
+ (unsigned long long)xid,
+ lkb->lkb_exflags,
+ lkb->lkb_flags,
+ lkb->lkb_status,
+ lkb->lkb_grmode,
+ lkb->lkb_rqmode,
+ waiting,
+ r->res_nodeid,
+ r->res_length,
+ r->res_name);
+}
+
+static int print_locks(struct dlm_rsb *r, struct seq_file *s)
+{
+ struct dlm_lkb *lkb;
+
+ lock_rsb(r);
+
+ list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+ print_lock(s, lkb, r);
+
+ list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+ print_lock(s, lkb, r);
+
+ list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+ print_lock(s, lkb, r);
+
+ unlock_rsb(r);
return 0;
}
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
read_lock(&ls->ls_rsbtbl[i].lock);
if (!list_empty(&ls->ls_rsbtbl[i].list)) {
ri->next = ls->ls_rsbtbl[i].list.next;
+ ri->rsb = list_entry(ri->next, struct dlm_rsb,
+ res_hashchain);
+ dlm_hold_rsb(ri->rsb);
read_unlock(&ls->ls_rsbtbl[i].lock);
break;
}
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
if (ri->entry >= ls->ls_rsbtbl_size)
return 1;
} else {
+ struct dlm_rsb *old = ri->rsb;
i = ri->entry;
read_lock(&ls->ls_rsbtbl[i].lock);
ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
ri->next = NULL;
ri->entry++;
read_unlock(&ls->ls_rsbtbl[i].lock);
+ dlm_put_rsb(old);
goto top;
}
+ ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
+ dlm_hold_rsb(ri->rsb);
read_unlock(&ls->ls_rsbtbl[i].lock);
+ dlm_put_rsb(old);
}
- ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
return 0;
}
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
{
struct rsb_iter *ri;
- ri = kmalloc(sizeof *ri, GFP_KERNEL);
+ ri = kzalloc(sizeof *ri, GFP_KERNEL);
if (!ri)
return NULL;
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
{
struct rsb_iter *ri = iter_ptr;
- print_resource(ri->rsb, file);
+ if (ri->locks) {
+ if (ri->header) {
+ seq_printf(file, "id nodeid remid pid xid exflags flags "
+ "sts grmode rqmode time_ms r_nodeid "
+ "r_len r_name\n");
+ ri->header = 0;
+ }
+ print_locks(ri->rsb, file);
+ } else {
+ print_resource(ri->rsb, file);
+ }
return 0;
}
@@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = {
};
/*
+ * Dump state in compact per-lock listing
+ */
+
+static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+ struct rsb_iter *ri;
+
+ ri = kzalloc(sizeof *ri, GFP_KERNEL);
+ if (!ri)
+ return NULL;
+
+ ri->ls = ls;
+ ri->entry = 0;
+ ri->next = NULL;
+ ri->locks = 1;
+
+ if (*pos == 0)
+ ri->header = 1;
+
+ if (rsb_iter_next(ri)) {
+ rsb_iter_free(ri);
+ return NULL;
+ }
+
+ return ri;
+}
+
+static void *locks_seq_start(struct seq_file *file, loff_t *pos)
+{
+ struct rsb_iter *ri;
+ loff_t n = *pos;
+
+ ri = locks_iter_init(file->private, pos);
+ if (!ri)
+ return NULL;
+
+ while (n--) {
+ if (rsb_iter_next(ri)) {
+ rsb_iter_free(ri);
+ return NULL;
+ }
+ }
+
+ return ri;
+}
+
+static struct seq_operations locks_seq_ops = {
+ .start = locks_seq_start,
+ .next = rsb_seq_next,
+ .stop = rsb_seq_stop,
+ .show = rsb_seq_show,
+};
+
+static int locks_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &locks_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private;
+
+ return 0;
+}
+
+static const struct file_operations locks_fops = {
+ .owner = THIS_MODULE,
+ .open = locks_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+/*
* dump lkb's on the ls_waiters list
*/
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
return -ENOMEM;
}
+ memset(name, 0, sizeof(name));
+ snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
+
+ ls->ls_debug_locks_dentry = debugfs_create_file(name,
+ S_IFREG | S_IRUGO,
+ dlm_root,
+ ls,
+ &locks_fops);
+ if (!ls->ls_debug_locks_dentry) {
+ debugfs_remove(ls->ls_debug_waiters_dentry);
+ debugfs_remove(ls->ls_debug_rsb_dentry);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
debugfs_remove(ls->ls_debug_rsb_dentry);
if (ls->ls_debug_waiters_dentry)
debugfs_remove(ls->ls_debug_waiters_dentry);
+ if (ls->ls_debug_locks_dentry)
+ debugfs_remove(ls->ls_debug_locks_dentry);
}
int dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 30994d68f6a..74901e981e1 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@ struct dlm_args {
void *bastaddr;
int mode;
struct dlm_lksb *lksb;
+ unsigned long timeout;
};
@@ -213,6 +214,9 @@ struct dlm_args {
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
+#define DLM_IFL_WATCH_TIMEWARN 0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
@@ -243,6 +247,9 @@ struct dlm_lkb {
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_astqueue; /* need ast to be sent */
struct list_head lkb_ownqueue; /* list of locks for a process */
+ struct list_head lkb_time_list;
+ unsigned long lkb_timestamp;
+ unsigned long lkb_timeout_cs;
char *lkb_lvbptr;
struct dlm_lksb *lkb_lksb; /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
+ struct mutex ls_timeout_mutex;
+ struct list_head ls_timeout;
+
struct list_head ls_nodes; /* current nodes in ls */
struct list_head ls_nodes_gone; /* dead node list, recovery */
int ls_num_nodes; /* number of nodes in ls */
int ls_low_nodeid;
int ls_total_weight;
int *ls_node_array;
+ gfp_t ls_allocation;
struct dlm_rsb ls_stub_rsb; /* for returning errors */
struct dlm_lkb ls_stub_lkb; /* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
struct dentry *ls_debug_rsb_dentry; /* debugfs */
struct dentry *ls_debug_waiters_dentry; /* debugfs */
+ struct dentry *ls_debug_locks_dentry; /* debugfs */
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
+ struct completion ls_members_done;
+ int ls_members_result;
struct miscdevice ls_device;
@@ -472,6 +486,7 @@ struct dlm_ls {
struct task_struct *ls_recoverd_task;
struct mutex ls_recoverd_active;
spinlock_t ls_recover_lock;
+ unsigned long ls_recover_begin; /* jiffies timestamp */
uint32_t ls_recover_status; /* DLM_RS_ */
uint64_t ls_recover_seq;
struct dlm_recover *ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
#define LSFL_RCOM_READY 3
#define LSFL_RCOM_WAIT 4
#define LSFL_UEVENT_WAIT 5
+#define LSFL_TIMEWARN 6
/* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
void __user *castaddr;
void __user *bastparam;
void __user *bastaddr;
+ uint64_t xid;
};
#define DLM_PROC_FLAGS_CLOSING 1
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d8d6e729f96..b455919c199 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
static int send_remove(struct dlm_rsb *r);
static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
struct dlm_message *ms);
static int receive_extralen(struct dlm_message *ms);
static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
+static void del_timeout(struct dlm_lkb *lkb);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
/*
* Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
/* Threads cannot use the lockspace while it's being recovered */
-static inline void lock_recovery(struct dlm_ls *ls)
+static inline void dlm_lock_recovery(struct dlm_ls *ls)
{
down_read(&ls->ls_in_recovery);
}
-static inline void unlock_recovery(struct dlm_ls *ls)
+void dlm_unlock_recovery(struct dlm_ls *ls)
{
up_read(&ls->ls_in_recovery);
}
-static inline int lock_recovery_try(struct dlm_ls *ls)
+int dlm_lock_recovery_try(struct dlm_ls *ls)
{
return down_read_trylock(&ls->ls_in_recovery);
}
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
if (is_master_copy(lkb))
return;
+ del_timeout(lkb);
+
DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
+ /* if the operation was a cancel, then return -DLM_ECANCEL, if a
+ timeout caused the cancel then return -ETIMEDOUT */
+ if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
+ lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
+ rv = -ETIMEDOUT;
+ }
+
+ if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+ lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+ rv = -EDEADLK;
+ }
+
lkb->lkb_lksb->sb_status = rv;
lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
kref_init(&lkb->lkb_ref);
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+ INIT_LIST_HEAD(&lkb->lkb_time_list);
get_random_bytes(&bucket, sizeof(bucket));
bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
{
int i;
- if (dlm_locking_stopped(ls))
- return;
-
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
shrink_bucket(ls, i);
+ if (dlm_locking_stopped(ls))
+ break;
cond_resched();
}
}
+static void add_timeout(struct dlm_lkb *lkb)
+{
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+ if (is_master_copy(lkb)) {
+ lkb->lkb_timestamp = jiffies;
+ return;
+ }
+
+ if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
+ !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+ lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
+ goto add_it;
+ }
+ if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+ goto add_it;
+ return;
+
+ add_it:
+ DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
+ mutex_lock(&ls->ls_timeout_mutex);
+ hold_lkb(lkb);
+ lkb->lkb_timestamp = jiffies;
+ list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
+ mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+static void del_timeout(struct dlm_lkb *lkb)
+{
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+ mutex_lock(&ls->ls_timeout_mutex);
+ if (!list_empty(&lkb->lkb_time_list)) {
+ list_del_init(&lkb->lkb_time_list);
+ unhold_lkb(lkb);
+ }
+ mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
+ lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex
+ and then lock rsb because of lock ordering in add_timeout. We may need
+ to specify some special timeout-related bits in the lkb that are just to
+ be accessed under the timeout_mutex. */
+
+void dlm_scan_timeout(struct dlm_ls *ls)
+{
+ struct dlm_rsb *r;
+ struct dlm_lkb *lkb;
+ int do_cancel, do_warn;
+
+ for (;;) {
+ if (dlm_locking_stopped(ls))
+ break;
+
+ do_cancel = 0;
+ do_warn = 0;
+ mutex_lock(&ls->ls_timeout_mutex);
+ list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+
+ if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+ time_after_eq(jiffies, lkb->lkb_timestamp +
+ lkb->lkb_timeout_cs * HZ/100))
+ do_cancel = 1;
+
+ if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+ time_after_eq(jiffies, lkb->lkb_timestamp +
+ dlm_config.ci_timewarn_cs * HZ/100))
+ do_warn = 1;
+
+ if (!do_cancel && !do_warn)
+ continue;
+ hold_lkb(lkb);
+ break;
+ }
+ mutex_unlock(&ls->ls_timeout_mutex);
+
+ if (!do_cancel && !do_warn)
+ break;
+
+ r = lkb->lkb_resource;
+ hold_rsb(r);
+ lock_rsb(r);
+
+ if (do_warn) {
+ /* clear flag so we only warn once */
+ lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+ if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
+ del_timeout(lkb);
+ dlm_timeout_warn(lkb);
+ }
+
+ if (do_cancel) {
+ log_debug(ls, "timeout cancel %x node %d %s",
+ lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+ lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+ lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
+ del_timeout(lkb);
+ _cancel_lock(r, lkb);
+ }
+
+ unlock_rsb(r);
+ unhold_rsb(r);
+ dlm_put_lkb(lkb);
+ }
+}
+
+/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
+ dlm_recoverd before checking/setting ls_recover_begin. */
+
+void dlm_adjust_timeouts(struct dlm_ls *ls)
+{
+ struct dlm_lkb *lkb;
+ long adj = jiffies - ls->ls_recover_begin;
+
+ ls->ls_recover_begin = 0;
+ mutex_lock(&ls->ls_timeout_mutex);
+ list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+ lkb->lkb_timestamp += adj;
+ mutex_unlock(&ls->ls_timeout_mutex);
+}
+
/* lkb is master or local copy */
static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
* queue for one resource. The granted mode of each lock blocks the requested
* mode of the other lock."
*
- * Part 2: if the granted mode of lkb is preventing the first lkb in the
- * convert queue from being granted, then demote lkb (set grmode to NL).
- * This second form requires that we check for conv-deadlk even when
- * now == 0 in _can_be_granted().
+ * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
+ * convert queue from being granted, then deadlk/demote lkb.
*
* Example:
* Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
*
* The first lock can't be granted because of the granted mode of the second
* lock and the second lock can't be granted because it's not first in the
- * list. We demote the granted mode of the second lock (the lkb passed to this
- * function).
+ * list. We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
+ * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
+ * flag set and return DEMOTED in the lksb flags.
+ *
+ * Originally, this function detected conv-deadlk in a more limited scope:
+ * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
+ * - if lkb1 was the first entry in the queue (not just earlier), and was
+ * blocked by the granted mode of lkb2, and there was nothing on the
+ * granted queue preventing lkb1 from being granted immediately, i.e.
+ * lkb2 was the only thing preventing lkb1 from being granted.
+ *
+ * That second condition meant we'd only say there was conv-deadlk if
+ * resolving it (by demotion) would lead to the first lock on the convert
+ * queue being granted right away. It allowed conversion deadlocks to exist
+ * between locks on the convert queue while they couldn't be granted anyway.
*
- * After the resolution, the "grant pending" function needs to go back and try
- * to grant locks on the convert queue again since the first lock can now be
- * granted.
+ * Now, we detect and take action on conversion deadlocks immediately when
+ * they're created, even if they may not be immediately consequential. If
+ * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
+ * mode that would prevent lkb1's conversion from being granted, we do a
+ * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
+ * I think this means that the lkb_is_ahead condition below should always
+ * be zero, i.e. there will never be conv-deadlk between two locks that are
+ * both already on the convert queue.
*/
-static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
+static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
{
- struct dlm_lkb *this, *first = NULL, *self = NULL;
+ struct dlm_lkb *lkb1;
+ int lkb_is_ahead = 0;
- list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
- if (!first)
- first = this;
- if (this == lkb) {
- self = lkb;
+ list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
+ if (lkb1 == lkb2) {
+ lkb_is_ahead = 1;
continue;
}
- if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
- return 1;
- }
-
- /* if lkb is on the convert queue and is preventing the first
- from being granted, then there's deadlock and we demote lkb.
- multiple converting locks may need to do this before the first
- converting lock can be granted. */
-
- if (self && self != first) {
- if (!modes_compat(lkb, first) &&
- !queue_conflict(&rsb->res_grantqueue, first))
- return 1;
+ if (!lkb_is_ahead) {
+ if (!modes_compat(lkb2, lkb1))
+ return 1;
+ } else {
+ if (!modes_compat(lkb2, lkb1) &&
+ !modes_compat(lkb1, lkb2))
+ return 1;
+ }
}
-
return 0;
}
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
if (!now && !conv && list_empty(&r->res_convertqueue) &&
first_in_list(lkb, &r->res_waitqueue))
return 1;
-
out:
- /*
- * The following, enabled by CONVDEADLK, departs from VMS.
- */
-
- if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
- conversion_deadlock_detect(r, lkb)) {
- lkb->lkb_grmode = DLM_LOCK_NL;
- lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
- }
-
return 0;
}
-/*
- * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
- * simple way to provide a big optimization to applications that can use them.
- */
-
-static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
+static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
+ int *err)
{
- uint32_t flags = lkb->lkb_exflags;
int rv;
int8_t alt = 0, rqmode = lkb->lkb_rqmode;
+ int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
+
+ if (err)
+ *err = 0;
rv = _can_be_granted(r, lkb, now);
if (rv)
goto out;
- if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
+ /*
+ * The CONVDEADLK flag is non-standard and tells the dlm to resolve
+ * conversion deadlocks by demoting grmode to NL, otherwise the dlm
+ * cancels one of the locks.
+ */
+
+ if (is_convert && can_be_queued(lkb) &&
+ conversion_deadlock_detect(r, lkb)) {
+ if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
+ lkb->lkb_grmode = DLM_LOCK_NL;
+ lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
+ } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+ if (err)
+ *err = -EDEADLK;
+ else {
+ log_print("can_be_granted deadlock %x now %d",
+ lkb->lkb_id, now);
+ dlm_dump_rsb(r);
+ }
+ }
goto out;
+ }
- if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
+ /*
+ * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
+ * to grant a request in a mode other than the normal rqmode. It's a
+ * simple way to provide a big optimization to applications that can
+ * use them.
+ */
+
+ if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
alt = DLM_LOCK_PR;
- else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
+ else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
alt = DLM_LOCK_CW;
if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
return rv;
}
+/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
+ for locks pending on the convert list. Once verified (watch for these
+ log_prints), we should be able to just call _can_be_granted() and not
+ bother with the demote/deadlk cases here (and there's no easy way to deal
+ with a deadlk here, we'd have to generate something like grant_lock with
+ the deadlk error.) */
+
+/* returns the highest requested mode of all blocked conversions */
+
static int grant_pending_convert(struct dlm_rsb *r, int high)
{
struct dlm_lkb *lkb, *s;
int hi, demoted, quit, grant_restart, demote_restart;
+ int deadlk;
quit = 0;
restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
demoted = is_demoted(lkb);
- if (can_be_granted(r, lkb, 0)) {
+ deadlk = 0;
+
+ if (can_be_granted(r, lkb, 0, &deadlk)) {
grant_lock_pending(r, lkb);
grant_restart = 1;
- } else {
- hi = max_t(int, lkb->lkb_rqmode, hi);
- if (!demoted && is_demoted(lkb))
- demote_restart = 1;
+ continue;
}
+
+ if (!demoted && is_demoted(lkb)) {
+ log_print("WARN: pending demoted %x node %d %s",
+ lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+ demote_restart = 1;
+ continue;
+ }
+
+ if (deadlk) {
+ log_print("WARN: pending deadlock %x node %d %s",
+ lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+ dlm_dump_rsb(r);
+ continue;
+ }
+
+ hi = max_t(int, lkb->lkb_rqmode, hi);
}
if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
struct dlm_lkb *lkb, *s;
list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
- if (can_be_granted(r, lkb, 0))
+ if (can_be_granted(r, lkb, 0, NULL))
grant_lock_pending(r, lkb);
else
high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
}
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
- int namelen, uint32_t parent_lkid, void *ast,
+ int namelen, unsigned long timeout_cs, void *ast,
void *astarg, void *bast, struct dlm_args *args)
{
int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static