diff options
Diffstat (limited to 'fs/dlm/recover.c')
| -rw-r--r-- | fs/dlm/recover.c | 194 |
1 files changed, 142 insertions, 52 deletions
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 3c025fe49ad..eaea789bf97 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -36,30 +36,23 @@ * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another * function thinks it could have completed the waited-on task, they should wake * up ls_wait_general to get an immediate response rather than waiting for the - * timer to detect the result. A timer wakes us up periodically while waiting - * to see if we should abort due to a node failure. This should only be called - * by the dlm_recoverd thread. + * timeout. This uses a timeout so it can check periodically if the wait + * should abort due to node failure (which doesn't cause a wake_up). + * This should only be called by the dlm_recoverd thread. */ -static void dlm_wait_timer_fn(unsigned long data) -{ - struct dlm_ls *ls = (struct dlm_ls *) data; - mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ)); - wake_up(&ls->ls_wait_general); -} - int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) { int error = 0; + int rv; - init_timer(&ls->ls_timer); - ls->ls_timer.function = dlm_wait_timer_fn; - ls->ls_timer.data = (long) ls; - ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ); - add_timer(&ls->ls_timer); - - wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); - del_timer_sync(&ls->ls_timer); + while (1) { + rv = wait_event_timeout(ls->ls_wait_general, + testfn(ls) || dlm_recovery_stopped(ls), + dlm_config.ci_recover_timer * HZ); + if (rv) + break; + } if (dlm_recovery_stopped(ls)) { log_debug(ls, "dlm_wait_function aborted"); @@ -277,32 +270,100 @@ static void recover_list_del(struct dlm_rsb *r) dlm_put_rsb(r); } -static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id) +static void recover_list_clear(struct dlm_ls *ls) { - struct dlm_rsb *r = NULL; + struct dlm_rsb *r, *s; spin_lock(&ls->ls_recover_list_lock); + list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) { + list_del_init(&r->res_recover_list); + r->res_recover_locks_count = 0; + dlm_put_rsb(r); + ls->ls_recover_list_count--; + } - list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) { - if (id == (unsigned long) r) - goto out; + if (ls->ls_recover_list_count != 0) { + log_error(ls, "warning: recover_list_count %d", + ls->ls_recover_list_count); + ls->ls_recover_list_count = 0; } - r = NULL; - out: spin_unlock(&ls->ls_recover_list_lock); +} + +static int recover_idr_empty(struct dlm_ls *ls) +{ + int empty = 1; + + spin_lock(&ls->ls_recover_idr_lock); + if (ls->ls_recover_list_count) + empty = 0; + spin_unlock(&ls->ls_recover_idr_lock); + + return empty; +} + +static int recover_idr_add(struct dlm_rsb *r) +{ + struct dlm_ls *ls = r->res_ls; + int rv; + + idr_preload(GFP_NOFS); + spin_lock(&ls->ls_recover_idr_lock); + if (r->res_id) { + rv = -1; + goto out_unlock; + } + rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT); + if (rv < 0) + goto out_unlock; + + r->res_id = rv; + ls->ls_recover_list_count++; + dlm_hold_rsb(r); + rv = 0; +out_unlock: + spin_unlock(&ls->ls_recover_idr_lock); + idr_preload_end(); + return rv; +} + +static void recover_idr_del(struct dlm_rsb *r) +{ + struct dlm_ls *ls = r->res_ls; + + spin_lock(&ls->ls_recover_idr_lock); + idr_remove(&ls->ls_recover_idr, r->res_id); + r->res_id = 0; + ls->ls_recover_list_count--; + spin_unlock(&ls->ls_recover_idr_lock); + + dlm_put_rsb(r); +} + +static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id) +{ + struct dlm_rsb *r; + + spin_lock(&ls->ls_recover_idr_lock); + r = idr_find(&ls->ls_recover_idr, (int)id); + spin_unlock(&ls->ls_recover_idr_lock); return r; } -static void recover_list_clear(struct dlm_ls *ls) +static void recover_idr_clear(struct dlm_ls *ls) { - struct dlm_rsb *r, *s; + struct dlm_rsb *r; + int id; - spin_lock(&ls->ls_recover_list_lock); - list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) { - list_del_init(&r->res_recover_list); + spin_lock(&ls->ls_recover_idr_lock); + + idr_for_each_entry(&ls->ls_recover_idr, r, id) { + idr_remove(&ls->ls_recover_idr, id); + r->res_id = 0; r->res_recover_locks_count = 0; - dlm_put_rsb(r); ls->ls_recover_list_count--; + + dlm_put_rsb(r); } if (ls->ls_recover_list_count != 0) { @@ -310,7 +371,7 @@ static void recover_list_clear(struct dlm_ls *ls) ls->ls_recover_list_count); ls->ls_recover_list_count = 0; } - spin_unlock(&ls->ls_recover_list_lock); + spin_unlock(&ls->ls_recover_idr_lock); } @@ -408,7 +469,7 @@ static int recover_master(struct dlm_rsb *r, unsigned int *count) set_new_master(r); error = 0; } else { - recover_list_add(r); + recover_idr_add(r); error = dlm_send_rcom_lookup(r, dir_nodeid); } @@ -465,7 +526,7 @@ int dlm_recover_masters(struct dlm_ls *ls) int nodir = dlm_no_directory(ls); int error; - log_debug(ls, "dlm_recover_masters"); + log_rinfo(ls, "dlm_recover_masters"); down_read(&ls->ls_root_sem); list_for_each_entry(r, &ls->ls_root_list, res_root_list) { @@ -491,12 +552,12 @@ int dlm_recover_masters(struct dlm_ls *ls) } up_read(&ls->ls_root_sem); - log_debug(ls, "dlm_recover_masters %u of %u", count, total); + log_rinfo(ls, "dlm_recover_masters %u of %u", count, total); - error = dlm_wait_function(ls, &recover_list_empty); + error = dlm_wait_function(ls, &recover_idr_empty); out: if (error) - recover_list_clear(ls); + recover_idr_clear(ls); return error; } @@ -505,7 +566,7 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) struct dlm_rsb *r; int ret_nodeid, new_master; - r = recover_list_find(ls, rc->rc_id); + r = recover_idr_find(ls, rc->rc_id); if (!r) { log_error(ls, "dlm_recover_master_reply no id %llx", (unsigned long long)rc->rc_id); @@ -524,9 +585,9 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) r->res_nodeid = new_master; set_new_master(r); unlock_rsb(r); - recover_list_del(r); + recover_idr_del(r); - if (recover_list_empty(ls)) + if (recover_idr_empty(ls)) wake_up(&ls->ls_wait_general); out: return 0; @@ -624,7 +685,7 @@ int dlm_recover_locks(struct dlm_ls *ls) } up_read(&ls->ls_root_sem); - log_debug(ls, "dlm_recover_locks %d out", count); + log_rinfo(ls, "dlm_recover_locks %d out", count); error = dlm_wait_function(ls, &recover_list_empty); out: @@ -652,8 +713,14 @@ void dlm_recovered_lock(struct dlm_rsb *r) * the VALNOTVALID flag if necessary, and determining the correct lvb contents * based on the lvb's of the locks held on the rsb. * - * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it - * was already set prior to recovery, it's not cleared, regardless of locks. + * RSB_VALNOTVALID is set in two cases: + * + * 1. we are master, but not new, and we purged an EX/PW lock held by a + * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL) + * + * 2. we are a new master, and there are only NL/CR locks left. + * (We could probably improve this by only invaliding in this way when + * the previous master left uncleanly. VMS docs mention that.) * * The LVB contents are only considered for changing when this is a new master * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with @@ -669,6 +736,19 @@ static void recover_lvb(struct dlm_rsb *r) int big_lock_exists = 0; int lvblen = r->res_ls->ls_lvblen; + if (!rsb_flag(r, RSB_NEW_MASTER2) && + rsb_flag(r, RSB_RECOVER_LVB_INVAL)) { + /* case 1 above */ + rsb_set_flag(r, RSB_VALNOTVALID); + return; + } + + if (!rsb_flag(r, RSB_NEW_MASTER2)) + return; + + /* we are the new master, so figure out if VALNOTVALID should + be set, and set the rsb lvb from the best lkb available. */ + list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) continue; @@ -707,13 +787,10 @@ static void recover_lvb(struct dlm_rsb *r) if (!lock_lvb_exists) goto out; + /* lvb is invalidated if only NL/CR locks remain */ if (!big_lock_exists) rsb_set_flag(r, RSB_VALNOTVALID); - /* don't mess with the lvb unless we're the new master */ - if (!rsb_flag(r, RSB_NEW_MASTER2)) - goto out; - if (!r->res_lvbptr) { r->res_lvbptr = dlm_allocate_lvb(r->res_ls); if (!r->res_lvbptr) @@ -739,6 +816,7 @@ static void recover_lvb(struct dlm_rsb *r) static void recover_conversion(struct dlm_rsb *r) { + struct dlm_ls *ls = r->res_ls; struct dlm_lkb *lkb; int grmode = -1; @@ -753,10 +831,15 @@ static void recover_conversion(struct dlm_rsb *r) list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { if (lkb->lkb_grmode != DLM_LOCK_IV) continue; - if (grmode == -1) + if (grmode == -1) { + log_debug(ls, "recover_conversion %x set gr to rq %d", + lkb->lkb_id, lkb->lkb_rqmode); lkb->lkb_grmode = lkb->lkb_rqmode; - else + } else { + log_debug(ls, "recover_conversion %x set gr %d", + lkb->lkb_id, grmode); lkb->lkb_grmode = grmode; + } } } @@ -781,19 +864,26 @@ void dlm_recover_rsbs(struct dlm_ls *ls) if (is_master(r)) { if (rsb_flag(r, RSB_RECOVER_CONVERT)) recover_conversion(r); + + /* recover lvb before granting locks so the updated + lvb/VALNOTVALID is presented in the completion */ + recover_lvb(r); + if (rsb_flag(r, RSB_NEW_MASTER2)) recover_grant(r); - recover_lvb(r); count++; + } else { + rsb_clear_flag(r, RSB_VALNOTVALID); } rsb_clear_flag(r, RSB_RECOVER_CONVERT); + rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL); rsb_clear_flag(r, RSB_NEW_MASTER2); unlock_rsb(r); } up_read(&ls->ls_root_sem); if (count) - log_debug(ls, "dlm_recover_rsbs %d done", count); + log_rinfo(ls, "dlm_recover_rsbs %d done", count); } /* Create a single list of all root rsb's to be used during recovery */ @@ -860,6 +950,6 @@ void dlm_clear_toss(struct dlm_ls *ls) } if (count) - log_debug(ls, "dlm_clear_toss %u done", count); + log_rinfo(ls, "dlm_clear_toss %u done", count); } |
