aboutsummaryrefslogtreecommitdiff
path: root/fs/dlm/recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm/recover.c')
-rw-r--r--fs/dlm/recover.c194
1 files changed, 142 insertions, 52 deletions
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index 3c025fe49ad..eaea789bf97 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -36,30 +36,23 @@
* (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another
* function thinks it could have completed the waited-on task, they should wake
* up ls_wait_general to get an immediate response rather than waiting for the
- * timer to detect the result. A timer wakes us up periodically while waiting
- * to see if we should abort due to a node failure. This should only be called
- * by the dlm_recoverd thread.
+ * timeout. This uses a timeout so it can check periodically if the wait
+ * should abort due to node failure (which doesn't cause a wake_up).
+ * This should only be called by the dlm_recoverd thread.
*/
-static void dlm_wait_timer_fn(unsigned long data)
-{
- struct dlm_ls *ls = (struct dlm_ls *) data;
- mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
- wake_up(&ls->ls_wait_general);
-}
-
int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
{
int error = 0;
+ int rv;
- init_timer(&ls->ls_timer);
- ls->ls_timer.function = dlm_wait_timer_fn;
- ls->ls_timer.data = (long) ls;
- ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
- add_timer(&ls->ls_timer);
-
- wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
- del_timer_sync(&ls->ls_timer);
+ while (1) {
+ rv = wait_event_timeout(ls->ls_wait_general,
+ testfn(ls) || dlm_recovery_stopped(ls),
+ dlm_config.ci_recover_timer * HZ);
+ if (rv)
+ break;
+ }
if (dlm_recovery_stopped(ls)) {
log_debug(ls, "dlm_wait_function aborted");
@@ -277,32 +270,100 @@ static void recover_list_del(struct dlm_rsb *r)
dlm_put_rsb(r);
}
-static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id)
+static void recover_list_clear(struct dlm_ls *ls)
{
- struct dlm_rsb *r = NULL;
+ struct dlm_rsb *r, *s;
spin_lock(&ls->ls_recover_list_lock);
+ list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
+ list_del_init(&r->res_recover_list);
+ r->res_recover_locks_count = 0;
+ dlm_put_rsb(r);
+ ls->ls_recover_list_count--;
+ }
- list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) {
- if (id == (unsigned long) r)
- goto out;
+ if (ls->ls_recover_list_count != 0) {
+ log_error(ls, "warning: recover_list_count %d",
+ ls->ls_recover_list_count);
+ ls->ls_recover_list_count = 0;
}
- r = NULL;
- out:
spin_unlock(&ls->ls_recover_list_lock);
+}
+
+static int recover_idr_empty(struct dlm_ls *ls)
+{
+ int empty = 1;
+
+ spin_lock(&ls->ls_recover_idr_lock);
+ if (ls->ls_recover_list_count)
+ empty = 0;
+ spin_unlock(&ls->ls_recover_idr_lock);
+
+ return empty;
+}
+
+static int recover_idr_add(struct dlm_rsb *r)
+{
+ struct dlm_ls *ls = r->res_ls;
+ int rv;
+
+ idr_preload(GFP_NOFS);
+ spin_lock(&ls->ls_recover_idr_lock);
+ if (r->res_id) {
+ rv = -1;
+ goto out_unlock;
+ }
+ rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT);
+ if (rv < 0)
+ goto out_unlock;
+
+ r->res_id = rv;
+ ls->ls_recover_list_count++;
+ dlm_hold_rsb(r);
+ rv = 0;
+out_unlock:
+ spin_unlock(&ls->ls_recover_idr_lock);
+ idr_preload_end();
+ return rv;
+}
+
+static void recover_idr_del(struct dlm_rsb *r)
+{
+ struct dlm_ls *ls = r->res_ls;
+
+ spin_lock(&ls->ls_recover_idr_lock);
+ idr_remove(&ls->ls_recover_idr, r->res_id);
+ r->res_id = 0;
+ ls->ls_recover_list_count--;
+ spin_unlock(&ls->ls_recover_idr_lock);
+
+ dlm_put_rsb(r);
+}
+
+static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id)
+{
+ struct dlm_rsb *r;
+
+ spin_lock(&ls->ls_recover_idr_lock);
+ r = idr_find(&ls->ls_recover_idr, (int)id);
+ spin_unlock(&ls->ls_recover_idr_lock);
return r;
}
-static void recover_list_clear(struct dlm_ls *ls)
+static void recover_idr_clear(struct dlm_ls *ls)
{
- struct dlm_rsb *r, *s;
+ struct dlm_rsb *r;
+ int id;
- spin_lock(&ls->ls_recover_list_lock);
- list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
- list_del_init(&r->res_recover_list);
+ spin_lock(&ls->ls_recover_idr_lock);
+
+ idr_for_each_entry(&ls->ls_recover_idr, r, id) {
+ idr_remove(&ls->ls_recover_idr, id);
+ r->res_id = 0;
r->res_recover_locks_count = 0;
- dlm_put_rsb(r);
ls->ls_recover_list_count--;
+
+ dlm_put_rsb(r);
}
if (ls->ls_recover_list_count != 0) {
@@ -310,7 +371,7 @@ static void recover_list_clear(struct dlm_ls *ls)
ls->ls_recover_list_count);
ls->ls_recover_list_count = 0;
}
- spin_unlock(&ls->ls_recover_list_lock);
+ spin_unlock(&ls->ls_recover_idr_lock);
}
@@ -408,7 +469,7 @@ static int recover_master(struct dlm_rsb *r, unsigned int *count)
set_new_master(r);
error = 0;
} else {
- recover_list_add(r);
+ recover_idr_add(r);
error = dlm_send_rcom_lookup(r, dir_nodeid);
}
@@ -465,7 +526,7 @@ int dlm_recover_masters(struct dlm_ls *ls)
int nodir = dlm_no_directory(ls);
int error;
- log_debug(ls, "dlm_recover_masters");
+ log_rinfo(ls, "dlm_recover_masters");
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
@@ -491,12 +552,12 @@ int dlm_recover_masters(struct dlm_ls *ls)
}
up_read(&ls->ls_root_sem);
- log_debug(ls, "dlm_recover_masters %u of %u", count, total);
+ log_rinfo(ls, "dlm_recover_masters %u of %u", count, total);
- error = dlm_wait_function(ls, &recover_list_empty);
+ error = dlm_wait_function(ls, &recover_idr_empty);
out:
if (error)
- recover_list_clear(ls);
+ recover_idr_clear(ls);
return error;
}
@@ -505,7 +566,7 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
struct dlm_rsb *r;
int ret_nodeid, new_master;
- r = recover_list_find(ls, rc->rc_id);
+ r = recover_idr_find(ls, rc->rc_id);
if (!r) {
log_error(ls, "dlm_recover_master_reply no id %llx",
(unsigned long long)rc->rc_id);
@@ -524,9 +585,9 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
r->res_nodeid = new_master;
set_new_master(r);
unlock_rsb(r);
- recover_list_del(r);
+ recover_idr_del(r);
- if (recover_list_empty(ls))
+ if (recover_idr_empty(ls))
wake_up(&ls->ls_wait_general);
out:
return 0;
@@ -624,7 +685,7 @@ int dlm_recover_locks(struct dlm_ls *ls)
}
up_read(&ls->ls_root_sem);
- log_debug(ls, "dlm_recover_locks %d out", count);
+ log_rinfo(ls, "dlm_recover_locks %d out", count);
error = dlm_wait_function(ls, &recover_list_empty);
out:
@@ -652,8 +713,14 @@ void dlm_recovered_lock(struct dlm_rsb *r)
* the VALNOTVALID flag if necessary, and determining the correct lvb contents
* based on the lvb's of the locks held on the rsb.
*
- * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it
- * was already set prior to recovery, it's not cleared, regardless of locks.
+ * RSB_VALNOTVALID is set in two cases:
+ *
+ * 1. we are master, but not new, and we purged an EX/PW lock held by a
+ * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL)
+ *
+ * 2. we are a new master, and there are only NL/CR locks left.
+ * (We could probably improve this by only invaliding in this way when
+ * the previous master left uncleanly. VMS docs mention that.)
*
* The LVB contents are only considered for changing when this is a new master
* of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with
@@ -669,6 +736,19 @@ static void recover_lvb(struct dlm_rsb *r)
int big_lock_exists = 0;
int lvblen = r->res_ls->ls_lvblen;
+ if (!rsb_flag(r, RSB_NEW_MASTER2) &&
+ rsb_flag(r, RSB_RECOVER_LVB_INVAL)) {
+ /* case 1 above */
+ rsb_set_flag(r, RSB_VALNOTVALID);
+ return;
+ }
+
+ if (!rsb_flag(r, RSB_NEW_MASTER2))
+ return;
+
+ /* we are the new master, so figure out if VALNOTVALID should
+ be set, and set the rsb lvb from the best lkb available. */
+
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
continue;
@@ -707,13 +787,10 @@ static void recover_lvb(struct dlm_rsb *r)
if (!lock_lvb_exists)
goto out;
+ /* lvb is invalidated if only NL/CR locks remain */
if (!big_lock_exists)
rsb_set_flag(r, RSB_VALNOTVALID);
- /* don't mess with the lvb unless we're the new master */
- if (!rsb_flag(r, RSB_NEW_MASTER2))
- goto out;
-
if (!r->res_lvbptr) {
r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
if (!r->res_lvbptr)
@@ -739,6 +816,7 @@ static void recover_lvb(struct dlm_rsb *r)
static void recover_conversion(struct dlm_rsb *r)
{
+ struct dlm_ls *ls = r->res_ls;
struct dlm_lkb *lkb;
int grmode = -1;
@@ -753,10 +831,15 @@ static void recover_conversion(struct dlm_rsb *r)
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
if (lkb->lkb_grmode != DLM_LOCK_IV)
continue;
- if (grmode == -1)
+ if (grmode == -1) {
+ log_debug(ls, "recover_conversion %x set gr to rq %d",
+ lkb->lkb_id, lkb->lkb_rqmode);
lkb->lkb_grmode = lkb->lkb_rqmode;
- else
+ } else {
+ log_debug(ls, "recover_conversion %x set gr %d",
+ lkb->lkb_id, grmode);
lkb->lkb_grmode = grmode;
+ }
}
}
@@ -781,19 +864,26 @@ void dlm_recover_rsbs(struct dlm_ls *ls)
if (is_master(r)) {
if (rsb_flag(r, RSB_RECOVER_CONVERT))
recover_conversion(r);
+
+ /* recover lvb before granting locks so the updated
+ lvb/VALNOTVALID is presented in the completion */
+ recover_lvb(r);
+
if (rsb_flag(r, RSB_NEW_MASTER2))
recover_grant(r);
- recover_lvb(r);
count++;
+ } else {
+ rsb_clear_flag(r, RSB_VALNOTVALID);
}
rsb_clear_flag(r, RSB_RECOVER_CONVERT);
+ rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL);
rsb_clear_flag(r, RSB_NEW_MASTER2);
unlock_rsb(r);
}
up_read(&ls->ls_root_sem);
if (count)
- log_debug(ls, "dlm_recover_rsbs %d done", count);
+ log_rinfo(ls, "dlm_recover_rsbs %d done", count);
}
/* Create a single list of all root rsb's to be used during recovery */
@@ -860,6 +950,6 @@ void dlm_clear_toss(struct dlm_ls *ls)
}
if (count)
- log_debug(ls, "dlm_clear_toss %u done", count);
+ log_rinfo(ls, "dlm_clear_toss %u done", count);
}