From e7fd41792fc0ee52a05fcaac87511f118328d147 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 18 Jan 2006 09:30:29 +0000 Subject: [DLM] The core of the DLM for GFS2/CLVM This is the core of the distributed lock manager which is required to use GFS2 as a cluster filesystem. It is also used by CLVM and can be used as a standalone lock manager independantly of either of these two projects. It implements VAX-style locking modes. Signed-off-by: David Teigland Signed-off-by: Steve Whitehouse --- fs/dlm/lock.c | 3610 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3610 insertions(+) create mode 100644 fs/dlm/lock.c (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c new file mode 100644 index 00000000000..81efb361f95 --- /dev/null +++ b/fs/dlm/lock.c @@ -0,0 +1,3610 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* Central locking logic has four stages: + + dlm_lock() + dlm_unlock() + + request_lock(ls, lkb) + convert_lock(ls, lkb) + unlock_lock(ls, lkb) + cancel_lock(ls, lkb) + + _request_lock(r, lkb) + _convert_lock(r, lkb) + _unlock_lock(r, lkb) + _cancel_lock(r, lkb) + + do_request(r, lkb) + do_convert(r, lkb) + do_unlock(r, lkb) + do_cancel(r, lkb) + + Stage 1 (lock, unlock) is mainly about checking input args and + splitting into one of the four main operations: + + dlm_lock = request_lock + dlm_lock+CONVERT = convert_lock + dlm_unlock = unlock_lock + dlm_unlock+CANCEL = cancel_lock + + Stage 2, xxxx_lock(), just finds and locks the relevant rsb which is + provided to the next stage. + + Stage 3, _xxxx_lock(), determines if the operation is local or remote. + When remote, it calls send_xxxx(), when local it calls do_xxxx(). + + Stage 4, do_xxxx(), is the guts of the operation. It manipulates the + given rsb and lkb and queues callbacks. + + For remote operations, send_xxxx() results in the corresponding do_xxxx() + function being executed on the remote node. The connecting send/receive + calls on local (L) and remote (R) nodes: + + L: send_xxxx() -> R: receive_xxxx() + R: do_xxxx() + L: receive_xxxx_reply() <- R: send_xxxx_reply() +*/ + +#include "dlm_internal.h" +#include "memory.h" +#include "lowcomms.h" +#include "requestqueue.h" +#include "util.h" +#include "dir.h" +#include "member.h" +#include "lockspace.h" +#include "ast.h" +#include "lock.h" +#include "rcom.h" +#include "recover.h" +#include "lvb_table.h" +#include "config.h" + +static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode); +static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_remove(struct dlm_rsb *r); +static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); +static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms); +static int receive_extralen(struct dlm_message *ms); + +/* + * Lock compatibilty matrix - thanks Steve + * UN = Unlocked state. Not really a state, used as a flag + * PD = Padding. Used to make the matrix a nice power of two in size + * Other states are the same as the VMS DLM. + * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same) + */ + +static const int __dlm_compat_matrix[8][8] = { + /* UN NL CR CW PR PW EX PD */ + {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */ + {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */ + {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */ + {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */ + {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */ + {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */ + {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */ + {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ +}; + +/* + * This defines the direction of transfer of LVB data. + * Granted mode is the row; requested mode is the column. + * Usage: matrix[grmode+1][rqmode+1] + * 1 = LVB is returned to the caller + * 0 = LVB is written to the resource + * -1 = nothing happens to the LVB + */ + +const int dlm_lvb_operations[8][8] = { + /* UN NL CR CW PR PW EX PD*/ + { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */ + { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */ + { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */ + { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */ + { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */ + { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */ + { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */ + { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */ +}; +EXPORT_SYMBOL_GPL(dlm_lvb_operations); + +#define modes_compat(gr, rq) \ + __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1] + +int dlm_modes_compat(int mode1, int mode2) +{ + return __dlm_compat_matrix[mode1 + 1][mode2 + 1]; +} + +/* + * Compatibility matrix for conversions with QUECVT set. + * Granted mode is the row; requested mode is the column. + * Usage: matrix[grmode+1][rqmode+1] + */ + +static const int __quecvt_compat_matrix[8][8] = { + /* UN NL CR CW PR PW EX PD */ + {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */ + {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */ + {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */ + {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */ + {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */ + {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */ + {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */ + {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ +}; + +static void dlm_print_lkb(struct dlm_lkb *lkb) +{ + printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n" + " status %d rqmode %d grmode %d wait_type %d ast_type %d\n", + lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags, + lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode, + lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); +} + +void dlm_print_rsb(struct dlm_rsb *r) +{ + printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", + r->res_nodeid, r->res_flags, r->res_first_lkid, + r->res_recover_locks_count, r->res_name); +} + +/* Threads cannot use the lockspace while it's being recovered */ + +static inline void lock_recovery(struct dlm_ls *ls) +{ + down_read(&ls->ls_in_recovery); +} + +static inline void unlock_recovery(struct dlm_ls *ls) +{ + up_read(&ls->ls_in_recovery); +} + +static inline int lock_recovery_try(struct dlm_ls *ls) +{ + return down_read_trylock(&ls->ls_in_recovery); +} + +static inline int can_be_queued(struct dlm_lkb *lkb) +{ + return !(lkb->lkb_exflags & DLM_LKF_NOQUEUE); +} + +static inline int force_blocking_asts(struct dlm_lkb *lkb) +{ + return (lkb->lkb_exflags & DLM_LKF_NOQUEUEBAST); +} + +static inline int is_demoted(struct dlm_lkb *lkb) +{ + return (lkb->lkb_sbflags & DLM_SBF_DEMOTED); +} + +static inline int is_remote(struct dlm_rsb *r) +{ + DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r);); + return !!r->res_nodeid; +} + +static inline int is_process_copy(struct dlm_lkb *lkb) +{ + return (lkb->lkb_nodeid && !(lkb->lkb_flags & DLM_IFL_MSTCPY)); +} + +static inline int is_master_copy(struct dlm_lkb *lkb) +{ + if (lkb->lkb_flags & DLM_IFL_MSTCPY) + DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb);); + return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? TRUE : FALSE; +} + +static inline int middle_conversion(struct dlm_lkb *lkb) +{ + if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) || + (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW)) + return TRUE; + return FALSE; +} + +static inline int down_conversion(struct dlm_lkb *lkb) +{ + return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode); +} + +static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + if (is_master_copy(lkb)) + return; + + DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); + + lkb->lkb_lksb->sb_status = rv; + lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; + + dlm_add_ast(lkb, AST_COMP); +} + +static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) +{ + if (is_master_copy(lkb)) + send_bast(r, lkb, rqmode); + else { + lkb->lkb_bastmode = rqmode; + dlm_add_ast(lkb, AST_BAST); + } +} + +/* + * Basic operations on rsb's and lkb's + */ + +static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) +{ + struct dlm_rsb *r; + + r = allocate_rsb(ls, len); + if (!r) + return NULL; + + r->res_ls = ls; + r->res_length = len; + memcpy(r->res_name, name, len); + init_MUTEX(&r->res_sem); + + INIT_LIST_HEAD(&r->res_lookup); + INIT_LIST_HEAD(&r->res_grantqueue); + INIT_LIST_HEAD(&r->res_convertqueue); + INIT_LIST_HEAD(&r->res_waitqueue); + INIT_LIST_HEAD(&r->res_root_list); + INIT_LIST_HEAD(&r->res_recover_list); + + return r; +} + +static int search_rsb_list(struct list_head *head, char *name, int len, + unsigned int flags, struct dlm_rsb **r_ret) +{ + struct dlm_rsb *r; + int error = 0; + + list_for_each_entry(r, head, res_hashchain) { + if (len == r->res_length && !memcmp(name, r->res_name, len)) + goto found; + } + return -ENOENT; + + found: + if (r->res_nodeid && (flags & R_MASTER)) + error = -ENOTBLK; + *r_ret = r; + return error; +} + +static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, + unsigned int flags, struct dlm_rsb **r_ret) +{ + struct dlm_rsb *r; + int error; + + error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); + if (!error) { + kref_get(&r->res_ref); + goto out; + } + error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); + if (error) + goto out; + + list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); + + if (dlm_no_directory(ls)) + goto out; + + if (r->res_nodeid == -1) { + rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); + r->res_first_lkid = 0; + } else if (r->res_nodeid > 0) { + rsb_set_flag(r, RSB_MASTER_UNCERTAIN); + r->res_first_lkid = 0; + } else { + DLM_ASSERT(r->res_nodeid == 0, dlm_print_rsb(r);); + DLM_ASSERT(!rsb_flag(r, RSB_MASTER_UNCERTAIN),); + } + out: + *r_ret = r; + return error; +} + +static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, + unsigned int flags, struct dlm_rsb **r_ret) +{ + int error; + write_lock(&ls->ls_rsbtbl[b].lock); + error = _search_rsb(ls, name, len, b, flags, r_ret); + write_unlock(&ls->ls_rsbtbl[b].lock); + return error; +} + +/* + * Find rsb in rsbtbl and potentially create/add one + * + * Delaying the release of rsb's has a similar benefit to applications keeping + * NL locks on an rsb, but without the guarantee that the cached master value + * will still be valid when the rsb is reused. Apps aren't always smart enough + * to keep NL locks on an rsb that they may lock again shortly; this can lead + * to excessive master lookups and removals if we don't delay the release. + * + * Searching for an rsb means looking through both the normal list and toss + * list. When found on the toss list the rsb is moved to the normal list with + * ref count of 1; when found on normal list the ref count is incremented. + */ + +static int find_rsb(struct dlm_ls *ls, char *name, int namelen, + unsigned int flags, struct dlm_rsb **r_ret) +{ + struct dlm_rsb *r, *tmp; + uint32_t hash, bucket; + int error = 0; + + if (dlm_no_directory(ls)) + flags |= R_CREATE; + + hash = jhash(name, namelen, 0); + bucket = hash & (ls->ls_rsbtbl_size - 1); + + error = search_rsb(ls, name, namelen, bucket, flags, &r); + if (!error) + goto out; + + if (error == -ENOENT && !(flags & R_CREATE)) + goto out; + + /* the rsb was found but wasn't a master copy */ + if (error == -ENOTBLK) + goto out; + + error = -ENOMEM; + r = create_rsb(ls, name, namelen); + if (!r) + goto out; + + r->res_hash = hash; + r->res_bucket = bucket; + r->res_nodeid = -1; + kref_init(&r->res_ref); + + /* With no directory, the master can be set immediately */ + if (dlm_no_directory(ls)) { + int nodeid = dlm_dir_nodeid(r); + if (nodeid == dlm_our_nodeid()) + nodeid = 0; + r->res_nodeid = nodeid; + } + + write_lock(&ls->ls_rsbtbl[bucket].lock); + error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); + if (!error) { + write_unlock(&ls->ls_rsbtbl[bucket].lock); + free_rsb(r); + r = tmp; + goto out; + } + list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); + write_unlock(&ls->ls_rsbtbl[bucket].lock); + error = 0; + out: + *r_ret = r; + return error; +} + +int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, + unsigned int flags, struct dlm_rsb **r_ret) +{ + return find_rsb(ls, name, namelen, flags, r_ret); +} + +/* This is only called to add a reference when the code already holds + a valid reference to the rsb, so there's no need for locking. */ + +static inline void hold_rsb(struct dlm_rsb *r) +{ + kref_get(&r->res_ref); +} + +void dlm_hold_rsb(struct dlm_rsb *r) +{ + hold_rsb(r); +} + +static void toss_rsb(struct kref *kref) +{ + struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); + struct dlm_ls *ls = r->res_ls; + + DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); + kref_init(&r->res_ref); + list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); + r->res_toss_time = jiffies; + if (r->res_lvbptr) { + free_lvb(r->res_lvbptr); + r->res_lvbptr = NULL; + } +} + +/* When all references to the rsb are gone it's transfered to + the tossed list for later disposal. */ + +static void put_rsb(struct dlm_rsb *r) +{ + struct dlm_ls *ls = r->res_ls; + uint32_t bucket = r->res_bucket; + + write_lock(&ls->ls_rsbtbl[bucket].lock); + kref_put(&r->res_ref, toss_rsb); + write_unlock(&ls->ls_rsbtbl[bucket].lock); +} + +void dlm_put_rsb(struct dlm_rsb *r) +{ + put_rsb(r); +} + +/* See comment for unhold_lkb */ + +static void unhold_rsb(struct dlm_rsb *r) +{ + int rv; + rv = kref_put(&r->res_ref, toss_rsb); + DLM_ASSERT(!rv, dlm_print_rsb(r);); +} + +static void kill_rsb(struct kref *kref) +{ + struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); + + /* All work is done after the return from kref_put() so we + can release the write_lock before the remove and free. */ + + DLM_ASSERT(list_empty(&r->res_lookup),); + DLM_ASSERT(list_empty(&r->res_grantqueue),); + DLM_ASSERT(list_empty(&r->res_convertqueue),); + DLM_ASSERT(list_empty(&r->res_waitqueue),); + DLM_ASSERT(list_empty(&r->res_root_list),); + DLM_ASSERT(list_empty(&r->res_recover_list),); +} + +/* Attaching/detaching lkb's from rsb's is for rsb reference counting. + The rsb must exist as long as any lkb's for it do. */ + +static void attach_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + hold_rsb(r); + lkb->lkb_resource = r; +} + +static void detach_lkb(struct dlm_lkb *lkb) +{ + if (lkb->lkb_resource) { + put_rsb(lkb->lkb_resource); + lkb->lkb_resource = NULL; + } +} + +static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) +{ + struct dlm_lkb *lkb, *tmp; + uint32_t lkid = 0; + uint16_t bucket; + + lkb = allocate_lkb(ls); + if (!lkb) + return -ENOMEM; + + lkb->lkb_nodeid = -1; + lkb->lkb_grmode = DLM_LOCK_IV; + kref_init(&lkb->lkb_ref); + + get_random_bytes(&bucket, sizeof(bucket)); + bucket &= (ls->ls_lkbtbl_size - 1); + + write_lock(&ls->ls_lkbtbl[bucket].lock); + + /* counter can roll over so we must verify lkid is not in use */ + + while (lkid == 0) { + lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16); + + list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, + lkb_idtbl_list) { + if (tmp->lkb_id != lkid) + continue; + lkid = 0; + break; + } + } + + lkb->lkb_id = lkid; + list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); + write_unlock(&ls->ls_lkbtbl[bucket].lock); + + *lkb_ret = lkb; + return 0; +} + +static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) +{ + uint16_t bucket = lkid & 0xFFFF; + struct dlm_lkb *lkb; + + list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { + if (lkb->lkb_id == lkid) + return lkb; + } + return NULL; +} + +static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) +{ + struct dlm_lkb *lkb; + uint16_t bucket = lkid & 0xFFFF; + + if (bucket >= ls->ls_lkbtbl_size) + return -EBADSLT; + + read_lock(&ls->ls_lkbtbl[bucket].lock); + lkb = __find_lkb(ls, lkid); + if (lkb) + kref_get(&lkb->lkb_ref); + read_unlock(&ls->ls_lkbtbl[bucket].lock); + + *lkb_ret = lkb; + return lkb ? 0 : -ENOENT; +} + +static void kill_lkb(struct kref *kref) +{ + struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref); + + /* All work is done after the return from kref_put() so we + can release the write_lock before the detach_lkb */ + + DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); +} + +static int put_lkb(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + uint16_t bucket = lkb->lkb_id & 0xFFFF; + + write_lock(&ls->ls_lkbtbl[bucket].lock); + if (kref_put(&lkb->lkb_ref, kill_lkb)) { + list_del(&lkb->lkb_idtbl_list); + write_unlock(&ls->ls_lkbtbl[bucket].lock); + + detach_lkb(lkb); + + /* for local/process lkbs, lvbptr points to caller's lksb */ + if (lkb->lkb_lvbptr && is_master_copy(lkb)) + free_lvb(lkb->lkb_lvbptr); + if (lkb->lkb_range) + free_range(lkb->lkb_range); + free_lkb(lkb); + return 1; + } else { + write_unlock(&ls->ls_lkbtbl[bucket].lock); + return 0; + } +} + +int dlm_put_lkb(struct dlm_lkb *lkb) +{ + return put_lkb(lkb); +} + +/* This is only called to add a reference when the code already holds + a valid reference to the lkb, so there's no need for locking. */ + +static inline void hold_lkb(struct dlm_lkb *lkb) +{ + kref_get(&lkb->lkb_ref); +} + +/* This is called when we need to remove a reference and are certain + it's not the last ref. e.g. del_lkb is always called between a + find_lkb/put_lkb and is always the inverse of a previous add_lkb. + put_lkb would work fine, but would involve unnecessary locking */ + +static inline void unhold_lkb(struct dlm_lkb *lkb) +{ + int rv; + rv = kref_put(&lkb->lkb_ref, kill_lkb); + DLM_ASSERT(!rv, dlm_print_lkb(lkb);); +} + +static void lkb_add_ordered(struct list_head *new, struct list_head *head, + int mode) +{ + struct dlm_lkb *lkb = NULL; + + list_for_each_entry(lkb, head, lkb_statequeue) + if (lkb->lkb_rqmode < mode) + break; + + if (!lkb) + list_add_tail(new, head); + else + __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue); +} + +/* add/remove lkb to rsb's grant/convert/wait queue */ + +static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status) +{ + kref_get(&lkb->lkb_ref); + + DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); + + lkb->lkb_status = status; + + switch (status) { + case DLM_LKSTS_WAITING: + if (lkb->lkb_exflags & DLM_LKF_HEADQUE) + list_add(&lkb->lkb_statequeue, &r->res_waitqueue); + else + list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue); + break; + case DLM_LKSTS_GRANTED: + /* convention says granted locks kept in order of grmode */ + lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue, + lkb->lkb_grmode); + break; + case DLM_LKSTS_CONVERT: + if (lkb->lkb_exflags & DLM_LKF_HEADQUE) + list_add(&lkb->lkb_statequeue, &r->res_convertqueue); + else + list_add_tail(&lkb->lkb_statequeue, + &r->res_convertqueue); + break; + default: + DLM_ASSERT(0, dlm_print_lkb(lkb); printk("sts=%d\n", status);); + } +} + +static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + lkb->lkb_status = 0; + list_del(&lkb->lkb_statequeue); + unhold_lkb(lkb); +} + +static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts) +{ + hold_lkb(lkb); + del_lkb(r, lkb); + add_lkb(r, lkb, sts); + unhold_lkb(lkb); +} + +/* add/remove lkb from global waiters list of lkb's waiting for + a reply from a remote node */ + +static void add_to_waiters(struct dlm_lkb *lkb, int mstype) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + + down(&ls->ls_waiters_sem); + if (lkb->lkb_wait_type) { + log_print("add_to_waiters error %d", lkb->lkb_wait_type); + goto out; + } + lkb->lkb_wait_type = mstype; + kref_get(&lkb->lkb_ref); + list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); + out: + up(&ls->ls_waiters_sem); +} + +static int _remove_from_waiters(struct dlm_lkb *lkb) +{ + int error = 0; + + if (!lkb->lkb_wait_type) { + log_print("remove_from_waiters error"); + error = -EINVAL; + goto out; + } + lkb->lkb_wait_type = 0; + list_del(&lkb->lkb_wait_reply); + unhold_lkb(lkb); + out: + return error; +} + +static int remove_from_waiters(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + int error; + + down(&ls->ls_waiters_sem); + error = _remove_from_waiters(lkb); + up(&ls->ls_waiters_sem); + return error; +} + +static void dir_remove(struct dlm_rsb *r) +{ + int to_nodeid; + + if (dlm_no_directory(r->res_ls)) + return; + + to_nodeid = dlm_dir_nodeid(r); + if (to_nodeid != dlm_our_nodeid()) + send_remove(r); + else + dlm_dir_remove_entry(r->res_ls, to_nodeid, + r->res_name, r->res_length); +} + +/* FIXME: shouldn't this be able to exit as soon as one non-due rsb is + found since they are in order of newest to oldest? */ + +static int shrink_bucket(struct dlm_ls *ls, int b) +{ + struct dlm_rsb *r; + int count = 0, found; + + for (;;) { + found = FALSE; + write_lock(&ls->ls_rsbtbl[b].lock); + list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, + res_hashchain) { + if (!time_after_eq(jiffies, r->res_toss_time + + dlm_config.toss_secs * HZ)) + continue; + found = TRUE; + break; + } + + if (!found) { + write_unlock(&ls->ls_rsbtbl[b].lock); + break; + } + + if (kref_put(&r->res_ref, kill_rsb)) { + list_del(&r->res_hashchain); + write_unlock(&ls->ls_rsbtbl[b].lock); + + if (is_master(r)) + dir_remove(r); + free_rsb(r); + count++; + } else { + write_unlock(&ls->ls_rsbtbl[b].lock); + log_error(ls, "tossed rsb in use %s", r->res_name); + } + } + + return count; +} + +void dlm_scan_rsbs(struct dlm_ls *ls) +{ + int i; + + if (dlm_locking_stopped(ls)) + return; + + for (i = 0; i < ls->ls_rsbtbl_size; i++) { + shrink_bucket(ls, i); + cond_resched(); + } +} + +/* lkb is master or local copy */ + +static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int b, len = r->res_ls->ls_lvblen; + + /* b=1 lvb returned to caller + b=0 lvb written to rsb or invalidated + b=-1 do nothing */ + + b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; + + if (b == 1) { + if (!lkb->lkb_lvbptr) + return; + + if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + return; + + if (!r->res_lvbptr) + return; + + memcpy(lkb->lkb_lvbptr, r->res_lvbptr, len); + lkb->lkb_lvbseq = r->res_lvbseq; + + } else if (b == 0) { + if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) { + rsb_set_flag(r, RSB_VALNOTVALID); + return; + } + + if (!lkb->lkb_lvbptr) + return; + + if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + return; + + if (!r->res_lvbptr) + r->res_lvbptr = allocate_lvb(r->res_ls); + + if (!r->res_lvbptr) + return; + + memcpy(r->res_lvbptr, lkb->lkb_lvbptr, len); + r->res_lvbseq++; + lkb->lkb_lvbseq = r->res_lvbseq; + rsb_clear_flag(r, RSB_VALNOTVALID); + } + + if (rsb_flag(r, RSB_VALNOTVALID)) + lkb->lkb_sbflags |= DLM_SBF_VALNOTVALID; +} + +static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + if (lkb->lkb_grmode < DLM_LOCK_PW) + return; + + if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) { + rsb_set_flag(r, RSB_VALNOTVALID); + return; + } + + if (!lkb->lkb_lvbptr) + return; + + if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + return; + + if (!r->res_lvbptr) + r->res_lvbptr = allocate_lvb(r->res_ls); + + if (!r->res_lvbptr) + return; + + memcpy(r->res_lvbptr, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); + r->res_lvbseq++; + rsb_clear_flag(r, RSB_VALNOTVALID); +} + +/* lkb is process copy (pc) */ + +static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + int b; + + if (!lkb->lkb_lvbptr) + return; + + if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + return; + + b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; + if (b == 1) { + int len = receive_extralen(ms); + memcpy(lkb->lkb_lvbptr, ms->m_extra, len); + lkb->lkb_lvbseq = ms->m_lvbseq; + } +} + +/* Manipulate lkb's on rsb's convert/granted/waiting queues + remove_lock -- used for unlock, removes lkb from granted + revert_lock -- used for cancel, moves lkb from convert to granted + grant_lock -- used for request and convert, adds lkb to granted or + moves lkb from convert or waiting to granted + + Each of these is used for master or local copy lkb's. There is + also a _pc() variation used to make the corresponding change on + a process copy (pc) lkb. */ + +static void _remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + del_lkb(r, lkb); + lkb->lkb_grmode = DLM_LOCK_IV; + /* this unhold undoes the original ref from create_lkb() + so this leads to the lkb being freed */ + unhold_lkb(lkb); +} + +static void remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + set_lvb_unlock(r, lkb); + _remove_lock(r, lkb); +} + +static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + _remove_lock(r, lkb); +} + +static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + lkb->lkb_rqmode = DLM_LOCK_IV; + + switch (lkb->lkb_status) { + case DLM_LKSTS_CONVERT: + move_lkb(r, lkb, DLM_LKSTS_GRANTED); + break; + case DLM_LKSTS_WAITING: + del_lkb(r, lkb); + lkb->lkb_grmode = DLM_LOCK_IV; + /* this unhold undoes the original ref from create_lkb() + so this leads to the lkb being freed */ + unhold_lkb(lkb); + break; + default: + log_print("invalid status for revert %d", lkb->lkb_status); + } +} + +static void revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + revert_lock(r, lkb); +} + +static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + if (lkb->lkb_grmode != lkb->lkb_rqmode) { + lkb->lkb_grmode = lkb->lkb_rqmode; + if (lkb->lkb_status) + move_lkb(r, lkb, DLM_LKSTS_GRANTED); + else + add_lkb(r, lkb, DLM_LKSTS_GRANTED); + } + + lkb->lkb_rqmode = DLM_LOCK_IV; + + if (lkb->lkb_range) { + lkb->lkb_range[GR_RANGE_START] = lkb->lkb_range[RQ_RANGE_START]; + lkb->lkb_range[GR_RANGE_END] = lkb->lkb_range[RQ_RANGE_END]; + } +} + +static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + set_lvb_lock(r, lkb); + _grant_lock(r, lkb); + lkb->lkb_highbast = 0; +} + +static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + set_lvb_lock_pc(r, lkb, ms); + _grant_lock(r, lkb); +} + +/* called by grant_pending_locks() which means an async grant message must + be sent to the requesting node in addition to granting the lock if the + lkb belongs to a remote node. */ + +static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + grant_lock(r, lkb); + if (is_master_copy(lkb)) + send_grant(r, lkb); + else + queue_cast(r, lkb, 0); +} + +static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) +{ + struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, + lkb_statequeue); + if (lkb->lkb_id == first->lkb_id) + return TRUE; + + return FALSE; +} + +/* Return 1 if the locks' ranges overlap. If the lkb has no range then it is + assumed to cover 0-ffffffff.ffffffff */ + +static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) +{ + if (!lkb1->lkb_range || !lkb2->lkb_range) + return TRUE; + + if (lkb1->lkb_range[RQ_RANGE_END] < lkb2->lkb_range[GR_RANGE_START] || + lkb1->lkb_range[RQ_RANGE_START] > lkb2->lkb_range[GR_RANGE_END]) + return FALSE; + + return TRUE; +} + +/* Check if the given lkb conflicts with another lkb on the queue. */ + +static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) +{ + struct dlm_lkb *this; + + list_for_each_entry(this, head, lkb_statequeue) { + if (this == lkb) + continue; + if (ranges_overlap(lkb, this) && !modes_compat(this, lkb)) + return TRUE; + } + return FALSE; +} + +/* + * "A conversion deadlock arises with a pair of lock requests in the converting + * queue for one resource. The granted mode of each lock blocks the requested + * mode of the other lock." + * + * Part 2: if the granted mode of lkb is preventing the first lkb in the + * convert queue from being granted, then demote lkb (set grmode to NL). + * This second form requires that we check for conv-deadlk even when + * now == 0 in _can_be_granted(). + * + * Example: + * Granted Queue: empty + * Convert Queue: NL->EX (first lock) + * PR->EX (second lock) + * + * The first lock can't be granted because of the granted mode of the second + * lock and the second lock can't be granted because it's not first in the + * list. We demote the granted mode of the second lock (the lkb passed to this + * function). + * + * After the resolution, the "grant pending" function needs to go back and try + * to grant locks on the convert queue again since the first lock can now be + * granted. + */ + +static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) +{ + struct dlm_lkb *this, *first = NULL, *self = NULL; + + list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { + if (!first) + first = this; + if (this == lkb) { + self = lkb; + continue; + } + + if (!ranges_overlap(lkb, this)) + continue; + + if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) + return TRUE; + } + + /* if lkb is on the convert queue and is preventing the first + from being granted, then there's deadlock and we demote lkb. + multiple converting locks may need to do this before the first + converting lock can be granted. */ + + if (self && self != first) { + if (!modes_compat(lkb, first) && + !queue_conflict(&rsb->res_grantqueue, first)) + return TRUE; + } + + return FALSE; +} + +/* + * Return 1 if the lock can be granted, 0 otherwise. + * Also detect and resolve conversion deadlocks. + * + * lkb is the lock to be granted + * + * now is 1 if the function is being called in the context of the + * immediate request, it is 0 if called later, after the lock has been + * queued. + * + * References are from chapter 6 of "VAXcluster Principles" by Roy Davis + */ + +static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) +{ + int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV); + + /* + * 6-10: Version 5.4 introduced an option to address the phenomenon of + * a new request for a NL mode lock being blocked. + * + * 6-11: If the optional EXPEDITE flag is used with the new NL mode + * request, then it would be granted. In essence, the use of this flag + * tells the Lock Manager to expedite theis request by not considering + * what may be in the CONVERTING or WAITING queues... As of this + * writing, the EXPEDITE flag can be used only with new requests for NL + * mode locks. This flag is not valid for conversion requests. + * + * A shortcut. Earlier checks return an error if EXPEDITE is used in a + * conversion or used with a non-NL requested mode. We also know an + * EXPEDITE request is always granted immediately, so now must always + * be 1. The full condition to grant an expedite request: (now && + * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can + * therefore be shortened to just checking the flag. + */ + + if (lkb->lkb_exflags & DLM_LKF_EXPEDITE) + return TRUE; + + /* + * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be + * added to the remaining conditions. + */ + + if (queue_conflict(&r->res_grantqueue, lkb)) + goto out; + + /* + * 6-3: By default, a conversion request is immediately granted if the + * requested mode is compatible with the modes of all other granted + * locks + */ + + if (queue_conflict(&r->res_convertqueue, lkb)) + goto out; + + /* + * 6-5: But the default algorithm for deciding whether to grant or + * queue conversion requests does not by itself guarantee that such + * requests are serviced on a "first come first serve" basis. This, in + * turn, can lead to a phenomenon known as "indefinate postponement". + * + * 6-7: This issue is dealt with by using the optional QUECVT flag with + * the system service employed to request a lock conversion. This flag + * forces certain conversion requests to be queued, even if they are + * compatible with the granted modes of other locks on the same + * resource. Thus, the use of this flag results in conversion requests + * being ordered on a "first come first servce" basis. + * + * DCT: This condition is all about new conversions being able to occur + * "in place" while the lock remains on the granted queue (assuming + * nothing else conflicts.) IOW if QUECVT isn't set, a conversion + * doesn't _have_ to go onto the convert queue where it's processed in + * order. The "now" variable is necessary to distinguish converts + * being received and processed for the first time now, because once a + * convert is moved to the conversion queue the condition below applies + * requiring fifo granting. + */ + + if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT)) + return TRUE; + + /* + * When using range locks the NOORDER flag is set to avoid the standard + * vms rules on grant order. + */ + + if (lkb->lkb_exflags & DLM_LKF_NOORDER) + return TRUE; + + /* + * 6-3: Once in that queue [CONVERTING], a conversion request cannot be + * granted until all other conversion requests ahead of it are granted + * and/or canceled. + */ + + if (!now && conv && first_in_list(lkb, &r->res_convertqueue)) + return TRUE; + + /* + * 6-4: By default, a new request is immediately granted only if all + * three of the following conditions are satisfied when the request is + * issued: + * - The queue of ungranted conversion requests for the resource is + * empty. + * - The queue of ungranted new requests for the resource is empty. + * - The mode of the new request is compatible with the most + * restrictive mode of all granted locks on the resource. + */ + + if (now && !conv && list_empty(&r->res_convertqueue) && + list_empty(&r->res_waitqueue)) + return TRUE; + + /* + * 6-4: Once a lock request is in the queue of ungranted new requests, + * it cannot be granted until the queue of ungranted conversion + * requests is empty, all ungranted new requests ahead of it are + * granted and/or canceled, and it is compatible with the granted mode + * of the most restrictive lock granted on the resource. + */ + + if (!now && !conv && list_empty(&r->res_convertqueue) && + first_in_list(lkb, &r->res_waitqueue)) + return TRUE; + + out: + /* + * The following, enabled by CONVDEADLK, departs from VMS. + */ + + if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) && + conversion_deadlock_detect(r, lkb)) { + lkb->lkb_grmode = DLM_LOCK_NL; + lkb->lkb_sbflags |= DLM_SBF_DEMOTED; + } + + return FALSE; +} + +/* + * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a + * simple way to provide a big optimization to applications that can use them. + */ + +static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) +{ + uint32_t flags = lkb->lkb_exflags; + int rv; + int8_t alt = 0, rqmode = lkb->lkb_rqmode; + + rv = _can_be_granted(r, lkb, now); + if (rv) + goto out; + + if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) + goto out; + + if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) + alt = DLM_LOCK_PR; + else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) + alt = DLM_LOCK_CW; + + if (alt) { + lkb->lkb_rqmode = alt; + rv = _can_be_granted(r, lkb, now); + if (rv) + lkb->lkb_sbflags |= DLM_SBF_ALTMODE; + else + lkb->lkb_rqmode = rqmode; + } + out: + return rv; +} + +static int grant_pending_convert(struct dlm_rsb *r, int high) +{ + struct dlm_lkb *lkb, *s; + int hi, demoted, quit, grant_restart, demote_restart; + + quit = 0; + restart: + grant_restart = 0; + demote_restart = 0; + hi = DLM_LOCK_IV; + + list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { + demoted = is_demoted(lkb); + if (can_be_granted(r, lkb, FALSE)) { + grant_lock_pending(r, lkb); + grant_restart = 1; + } else { + hi = max_t(int, lkb->lkb_rqmode, hi); + if (!demoted && is_demoted(lkb)) + demote_restart = 1; + } + } + + if (grant_restart) + goto restart; + if (demote_restart && !quit) { + quit = 1; + goto restart; + } + + return max_t(int, high, hi); +} + +static int grant_pending_wait(struct dlm_rsb *r, int high) +{ + struct dlm_lkb *lkb, *s; + + list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { + if (can_be_granted(r, lkb, FALSE)) + grant_lock_pending(r, lkb); + else + high = max_t(int, lkb->lkb_rqmode, high); + } + + return high; +} + +static void grant_pending_locks(struct dlm_rsb *r) +{ + struct dlm_lkb *lkb, *s; + int high = DLM_LOCK_IV; + + DLM_ASSERT(is_master(r), dlm_print_rsb(r);); + + high = grant_pending_convert(r, high); + high = grant_pending_wait(r, high); + + if (high == DLM_LOCK_IV) + return; + + /* + * If there are locks left on the wait/convert queue then send blocking + * ASTs to granted locks based on the largest requested mode (high) + * found above. This can generate spurious blocking ASTs for range + * locks. FIXME: highbast < high comparison not valid for PR/CW. + */ + + list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { + if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) && + !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) { + queue_bast(r, lkb, high); + lkb->lkb_highbast = high; + } + } +} + +static void send_bast_queue(struct dlm_rsb *r, struct list_head *head, + struct dlm_lkb *lkb) +{ + struct dlm_lkb *gr; + + list_for_each_entry(gr, head, lkb_statequeue) { + if (gr->lkb_bastaddr && + gr->lkb_highbast < lkb->lkb_rqmode && + ranges_overlap(lkb, gr) && !modes_compat(gr, lkb)) { + queue_bast(r, gr, lkb->lkb_rqmode); + gr->lkb_highbast = lkb->lkb_rqmode; + } + } +} + +static void send_blocking_asts(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + send_bast_queue(r, &r->res_grantqueue, lkb); +} + +static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + send_bast_queue(r, &r->res_grantqueue, lkb); + send_bast_queue(r, &r->res_convertqueue, lkb); +} + +/* set_master(r, lkb) -- set the master nodeid of a resource + + The purpose of this function is to set the nodeid field in the given + lkb using the nodeid field in the given rsb. If the rsb's nodeid is + known, it can just be copied to the lkb and the function will return + 0. If the rsb's nodeid is _not_ known, it needs to be looked up + before it can be copied to the lkb. + + When the rsb nodeid is being looked up remotely, the initial lkb + causing the lookup is kept on the ls_waiters list waiting for the + lookup reply. Other lkb's waiting for the same rsb lookup are kept + on the rsb's res_lookup list until the master is verified. + + Return values: + 0: nodeid is set in rsb/lkb and the caller should go ahead and use it + 1: the rsb master is not available and the lkb has been placed on + a wait queue +*/ + +static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = r->res_ls; + int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); + + if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { + rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); + r->res_first_lkid = lkb->lkb_id; + lkb->lkb_nodeid = r->res_nodeid; + return 0; + } + + if (r->res_first_lkid && r->res_first_lkid != lkb->lkb_id) { + list_add_tail(&lkb->lkb_rsb_lookup, &r->res_lookup); + return 1; + } + + if (r->res_nodeid == 0) { + lkb->lkb_nodeid = 0; + return 0; + } + + if (r->res_nodeid > 0) { + lkb->lkb_nodeid = r->res_nodeid; + return 0; + } + + DLM_ASSERT(r->res_nodeid == -1, dlm_print_rsb(r);); + + dir_nodeid = dlm_dir_nodeid(r); + + if (dir_nodeid != our_nodeid) { + r->res_first_lkid = lkb->lkb_id; + send_lookup(r, lkb); + return 1; + } + + for (;;) { + /* It's possible for dlm_scand to remove an old rsb for + this same resource from the toss list, us to create + a new one, look up the master locally, and find it + already exists just before dlm_scand does the + dir_remove() on the previous rsb. */ + + error = dlm_dir_lookup(ls, our_nodeid, r->res_name, + r->res_length, &ret_nodeid); + if (!error) + break; + log_debug(ls, "dir_lookup error %d %s", error, r->res_name); + schedule(); + } + + if (ret_nodeid == our_nodeid) { + r->res_first_lkid = 0; + r->res_nodeid = 0; + lkb->lkb_nodeid = 0; + } else { + r->res_first_lkid = lkb->lkb_id; + r->res_nodeid = ret_nodeid; + lkb->lkb_nodeid = ret_nodeid; + } + return 0; +} + +static void process_lookup_list(struct dlm_rsb *r) +{ + struct dlm_lkb *lkb, *safe; + + list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) { + list_del(&lkb->lkb_rsb_lookup); + _request_lock(r, lkb); + schedule(); + } +} + +/* confirm_master -- confirm (or deny) an rsb's master nodeid */ + +static void confirm_master(struct dlm_rsb *r, int error) +{ + struct dlm_lkb *lkb; + + if (!r->res_first_lkid) + return; + + switch (error) { + case 0: + case -EINPROGRESS: + r->res_first_lkid = 0; + process_lookup_list(r); + break; + + case -EAGAIN: + /* the remote master didn't queue our NOQUEUE request; + make a waiting lkb the first_lkid */ + + r->res_first_lkid = 0; + + if (!list_empty(&r->res_lookup)) { + lkb = list_entry(r->res_lookup.next, struct dlm_lkb, + lkb_rsb_lookup); + list_del(&lkb->lkb_rsb_lookup); + r->res_first_lkid = lkb->lkb_id; + _request_lock(r, lkb); + } else + r->res_nodeid = -1; + break; + + default: + log_error(r->res_ls, "confirm_master unknown error %d", error); + } +} + +static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, + int namelen, uint32_t parent_lkid, void *ast, + void *astarg, void *bast, struct dlm_range *range, + struct dlm_args *args) +{ + int rv = -EINVAL; + + /* check for invalid arg usage */ + + if (mode < 0 || mode > DLM_LOCK_EX) + goto out; + + if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN)) + goto out; + + if (flags & DLM_LKF_CANCEL) + goto out; + + if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT)) + goto out; + + if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT)) + goto out; + + if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE) + goto out; + + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT) + goto out; + + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT) + goto out; + + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE) + goto out; + + if (flags & DLM_LKF_EXPEDITE && mode != DLM_LOCK_NL) + goto out; + + if (!ast || !lksb) + goto out; + + if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr) + goto out; + + /* parent/child locks not yet supported */ + if (parent_lkid) + goto out; + + if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid) + goto out; + + /* these args will be copied to the lkb in validate_lock_args, + it cannot be done now because when converting locks, fields in + an active lkb cannot be modified before locking the rsb */ + + args->flags = flags; + args->astaddr = ast; + args->astparam = (long) astarg; + args->bastaddr = bast; + args->mode = mode; + args->lksb = lksb; + args->range = range; + rv = 0; + out: + return rv; +} + +static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) +{ + if (flags & ~(DLM_LKF_CANCEL | DLM_LKF_VALBLK | DLM_LKF_IVVALBLK | + DLM_LKF_FORCEUNLOCK)) + return -EINVAL; + + args->flags = flags; + args->astparam = (long) astarg; + return 0; +} + +static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_args *args) +{ + int rv = -EINVAL; + + if (args->flags & DLM_LKF_CONVERT) { + if (lkb->lkb_flags & DLM_IFL_MSTCPY) + goto out; + + if (args->flags & DLM_LKF_QUECVT && + !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) + goto out; + + rv = -EBUSY; + if (lkb->lkb_status != DLM_LKSTS_GRANTED) + goto out; + + if (lkb->lkb_wait_type) + goto out; + } + + lkb->lkb_exflags = args->flags; + lkb->lkb_sbflags = 0; + lkb->lkb_astaddr = args->astaddr; + lkb->lkb_astparam = args->astparam; + lkb->lkb_bastaddr = args->bastaddr; + lkb->lkb_rqmode = args->mode; + lkb->lkb_lksb = args->lksb; + lkb->lkb_lvbptr = args->lksb->sb_lvbptr; + lkb->lkb_ownpid = (int) current->pid; + + rv = 0; + if (!args->range) + goto out; + + if (!lkb->lkb_range) { + rv = -ENOMEM; + lkb->lkb_range = allocate_range(ls); + if (!lkb->lkb_range) + goto out; + /* This is needed for conversions that contain ranges + where the original lock didn't but it's harmless for + new locks too. */ + lkb->lkb_range[GR_RANGE_START] = 0LL; + lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; + } + + lkb->lkb_range[RQ_RANGE_START] = args->range->ra_start; + lkb->lkb_range[RQ_RANGE_END] = args->range->ra_end; + lkb->lkb_flags |= DLM_IFL_RANGE; + rv = 0; + out: + return rv; +} + +static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) +{ + int rv = -EINVAL; + + if (lkb->lkb_flags & DLM_IFL_MSTCPY) + goto out; + + if (args->flags & DLM_LKF_FORCEUNLOCK) + goto out_ok; + + if (args->flags & DLM_LKF_CANCEL && + lkb->lkb_status == DLM_LKSTS_GRANTED) + goto out; + + if (!(args->flags & DLM_LKF_CANCEL) && + lkb->lkb_status != DLM_LKSTS_GRANTED) + goto out; + + rv = -EBUSY; + if (lkb->lkb_wait_type) + goto out; + + out_ok: + lkb->lkb_exflags = args->flags; + lkb->lkb_sbflags = 0; + lkb->lkb_astparam = args->astparam; + + rv = 0; + out: + return rv; +} + +/* + * Four stage 4 varieties: + * do_request(), do_convert(), do_unlock(), do_cancel() + * These are called on the master node for the given lock and + * from the central locking logic. + */ + +static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error = 0; + + if (can_be_granted(r, lkb, TRUE)) { + grant_lock(r, lkb); + queue_cast(r, lkb, 0); + goto out; + } + + if (can_be_queued(lkb)) { + error = -EINPROGRESS; + add_lkb(r, lkb, DLM_LKSTS_WAITING); + send_blocking_asts(r, lkb); + goto out; + } + + error = -EAGAIN; + if (force_blocking_asts(lkb)) + send_blocking_asts_all(r, lkb); + queue_cast(r, lkb, -EAGAIN); + + out: + return error; +} + +static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error = 0; + + /* changing an existing lock may allow others to be granted */ + + if (can_be_granted(r, lkb, TRUE)) { + grant_lock(r, lkb); + queue_cast(r, lkb, 0); + grant_pending_locks(r); + goto out; + } + + if (can_be_queued(lkb)) { + if (is_demoted(lkb)) + grant_pending_locks(r); + error = -EINPROGRESS; + del_lkb(r, lkb); + add_lkb(r, lkb, DLM_LKSTS_CONVERT); + send_blocking_asts(r, lkb); + goto out; + } + + error = -EAGAIN; + if (force_blocking_asts(lkb)) + send_blocking_asts_all(r, lkb); + queue_cast(r, lkb, -EAGAIN); + + out: + return error; +} + +static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + remove_lock(r, lkb); + queue_cast(r, lkb, -DLM_EUNLOCK); + grant_pending_locks(r); + return -DLM_EUNLOCK; +} + +static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + revert_lock(r, lkb); + queue_cast(r, lkb, -DLM_ECANCEL); + grant_pending_locks(r); + return -DLM_ECANCEL; +} + +/* + * Four stage 3 varieties: + * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock() + */ + +/* add a new lkb to a possibly new rsb, called by requesting process */ + +static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + /* set_master: sets lkb nodeid from r */ + + error = set_master(r, lkb); + if (error < 0) + goto out; + if (error) { + error = 0; + goto out; + } + + if (is_remote(r)) + /* receive_request() calls do_request() on remote node */ + error = send_request(r, lkb); + else + error = do_request(r, lkb); + out: + return error; +} + +/* change some property of an existing lkb, e.g. mode, range */ + +static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + if (is_remote(r)) + /* receive_convert() calls do_convert() on remote node */ + error = send_convert(r, lkb); + else + error = do_convert(r, lkb); + + return error; +} + +/* remove an existing lkb from the granted queue */ + +static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + if (is_remote(r)) + /* receive_unlock() calls do_unlock() on remote node */ + error = send_unlock(r, lkb); + else + error = do_unlock(r, lkb); + + return error; +} + +/* remove an existing lkb from the convert or wait queue */ + +static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + if (is_remote(r)) + /* receive_cancel() calls do_cancel() on remote node */ + error = send_cancel(r, lkb); + else + error = do_cancel(r, lkb); + + return error; +} + +/* + * Four stage 2 varieties: + * request_lock(), convert_lock(), unlock_lock(), cancel_lock() + */ + +static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, + int len, struct dlm_args *args) +{ + struct dlm_rsb *r; + int error; + + error = validate_lock_args(ls, lkb, args); + if (error) + goto out; + + error = find_rsb(ls, name, len, R_CREATE, &r); + if (error) + goto out; + + lock_rsb(r); + + attach_lkb(r, lkb); + lkb->lkb_lksb->sb_lkid = lkb->lkb_id; + + error = _request_lock(r, lkb); + + unlock_rsb(r); + put_rsb(r); + + out: + return error; +} + +static int convert_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_args *args) +{ + struct dlm_rsb *r; + int error; + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + error = validate_lock_args(ls, lkb, args); + if (error) + goto out; + + error = _convert_lock(r, lkb); + out: + unlock_rsb(r); + put_rsb(r); + return error; +} + +static int unlock_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_args *args) +{ + struct dlm_rsb *r; + int error; + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + error = validate_unlock_args(lkb, args); + if (error) + goto out; + + error = _unlock_lock(r, lkb); + out: + unlock_rsb(r); + put_rsb(r); + return error; +} + +static int cancel_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_args *args) +{ + struct dlm_rsb *r; + int error; + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + error = validate_unlock_args(lkb, args); + if (error) + goto out; + + error = _cancel_lock(r, lkb); + out: + unlock_rsb(r); + put_rsb(r); + return error; +} + +/* + * Two stage 1 varieties: dlm_lock() and dlm_unlock() + */ + +int dlm_lock(dlm_lockspace_t *lockspace, + int mode, + struct dlm_lksb *lksb, + uint32_t flags, + void *name, + unsigned int namelen, + uint32_t parent_lkid, + void (*ast) (void *astarg), + void *astarg, + void (*bast) (void *astarg, int mode), + struct dlm_range *range) +{ + struct dlm_ls *ls; + struct dlm_lkb *lkb; + struct dlm_args args; + int error, convert = flags & DLM_LKF_CONVERT; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + lock_recovery(ls); + + if (convert) + error = find_lkb(ls, lksb->sb_lkid, &lkb); + else + error = create_lkb(ls, &lkb); + + if (error) + goto out; + + error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, + astarg, bast, range, &args); + if (error) + goto out_put; + + if (convert) + error = convert_lock(ls, lkb, &args); + else + error = request_lock(ls, lkb, name, namelen, &args); + + if (error == -EINPROGRESS) + error = 0; + out_put: + if (convert || error) + put_lkb(lkb); + if (error == -EAGAIN) + error = 0; + out: + unlock_recovery(ls); + dlm_put_lockspace(ls); + return error; +} + +int dlm_unlock(dlm_lockspace_t *lockspace, + uint32_t lkid, + uint32_t flags, + struct dlm_lksb *lksb, + void *astarg) +{ + struct dlm_ls *ls; + struct dlm_lkb *lkb; + struct dlm_args args; + int error; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + lock_recovery(ls); + + error = find_lkb(ls, lkid, &lkb); + if (error) + goto out; + + error = set_unlock_args(flags, astarg, &args); + if (error) + goto out_put; + + if (flags & DLM_LKF_CANCEL) + error = cancel_lock(ls, lkb, &args); + else + error = unlock_lock(ls, lkb, &args); + + if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL) + error = 0; + out_put: + put_lkb(lkb); + out: + unlock_recovery(ls); + dlm_put_lockspace(ls); + return error; +} + +/* + * send/receive routines for remote operations and replies + * + * send_args + * send_common + * send_request receive_request + * send_convert receive_convert + * send_unlock receive_unlock + * send_cancel receive_cancel + * send_grant receive_grant + * send_bast receive_bast + * send_lookup receive_lookup + * send_remove receive_remove + * + * send_common_reply + * receive_request_reply send_request_reply + * receive_convert_reply send_convert_reply + * receive_unlock_reply send_unlock_reply + * receive_cancel_reply send_cancel_reply + * receive_lookup_reply send_lookup_reply + */ + +static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, + int to_nodeid, int mstype, + struct dlm_message **ms_ret, + struct dlm_mhandle **mh_ret) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + char *mb; + int mb_len = sizeof(struct dlm_message); + + switch (mstype) { + case DLM_MSG_REQUEST: + case DLM_MSG_LOOKUP: + case DLM_MSG_REMOVE: + mb_len += r->res_length; + break; + case DLM_MSG_CONVERT: + case DLM_MSG_UNLOCK: + case DLM_MSG_REQUEST_REPLY: + case DLM_MSG_CONVERT_REPLY: + case DLM_MSG_GRANT: + if (lkb && lkb->lkb_lvbptr) + mb_len += r->res_ls->ls_lvblen; + break; + } + + /* get_buffer gives us a message handle (mh) that we need to + pass into lowcomms_commit and a message buffer (mb) that we + write our data into */ + + mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); + if (!mh) + return -ENOBUFS; + + memset(mb, 0, mb_len); + + ms = (struct dlm_message *) mb; + + ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); + ms->m_header.h_lockspace = r->res_ls->ls_global_id; + ms->m_header.h_nodeid = dlm_our_nodeid(); + ms->m_header.h_length = mb_len; + ms->m_header.h_cmd = DLM_MSG; + + ms->m_type = mstype; + + *mh_ret = mh; + *ms_ret = ms; + return 0; +} + +/* further lowcomms enhancements or alternate implementations may make + the return value from this function useful at some point */ + +static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms) +{ + dlm_message_out(ms); + dlm_lowcomms_commit_buffer(mh); + return 0; +} + +static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + ms->m_nodeid = lkb->lkb_nodeid; + ms->m_pid = lkb->lkb_ownpid; + ms->m_lkid = lkb->lkb_id; + ms->m_remid = lkb->lkb_remid; + ms->m_exflags = lkb->lkb_exflags; + ms->m_sbflags = lkb->lkb_sbflags; + ms->m_flags = lkb->lkb_flags; + ms->m_lvbseq = lkb->lkb_lvbseq; + ms->m_status = lkb->lkb_status; + ms->m_grmode = lkb->lkb_grmode; + ms->m_rqmode = lkb->lkb_rqmode; + ms->m_hash = r->res_hash; + + /* m_result and m_bastmode are set from function args, + not from lkb fields */ + + if (lkb->lkb_bastaddr) + ms->m_asts |= AST_BAST; + if (lkb->lkb_astaddr) + ms->m_asts |= AST_COMP; + + if (lkb->lkb_range) { + ms->m_range[0] = lkb->lkb_range[RQ_RANGE_START]; + ms->m_range[1] = lkb->lkb_range[RQ_RANGE_END]; + } + + if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) + memcpy(ms->m_extra, r->res_name, r->res_length); + + else if (lkb->lkb_lvbptr) + memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); + +} + +static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + add_to_waiters(lkb, mstype); + + to_nodeid = r->res_nodeid; + + error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); + if (error) + goto fail; + + send_args(r, lkb, ms); + + error = send_message(mh, ms); + if (error) + goto fail; + return 0; + + fail: + remove_from_waiters(lkb); + return error; +} + +static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + return send_common(r, lkb, DLM_MSG_REQUEST); +} + +static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + error = send_common(r, lkb, DLM_MSG_CONVERT); + + /* down conversions go without a reply from the master */ + if (!error && down_conversion(lkb)) { + remove_from_waiters(lkb); + r->res_ls->ls_stub_ms.m_result = 0; + __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); + } + + return error; +} + +/* FIXME: if this lkb is the only lock we hold on the rsb, then set + MASTER_UNCERTAIN to force the next request on the rsb to confirm + that the master is still correct. */ + +static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + return send_common(r, lkb, DLM_MSG_UNLOCK); +} + +static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + return send_common(r, lkb, DLM_MSG_CANCEL); +} + +static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + to_nodeid = lkb->lkb_nodeid; + + error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh); + if (error) + goto out; + + send_args(r, lkb, ms); + + ms->m_result = 0; + + error = send_message(mh, ms); + out: + return error; +} + +static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + to_nodeid = lkb->lkb_nodeid; + + error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh); + if (error) + goto out; + + send_args(r, lkb, ms); + + ms->m_bastmode = mode; + + error = send_message(mh, ms); + out: + return error; +} + +static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + add_to_waiters(lkb, DLM_MSG_LOOKUP); + + to_nodeid = dlm_dir_nodeid(r); + + error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); + if (error) + goto fail; + + send_args(r, lkb, ms); + + error = send_message(mh, ms); + if (error) + goto fail; + return 0; + + fail: + remove_from_waiters(lkb); + return error; +} + +static int send_remove(struct dlm_rsb *r) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + to_nodeid = dlm_dir_nodeid(r); + + error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh); + if (error) + goto out; + + memcpy(ms->m_extra, r->res_name, r->res_length); + ms->m_hash = r->res_hash; + + error = send_message(mh, ms); + out: + return error; +} + +static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, + int mstype, int rv) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + to_nodeid = lkb->lkb_nodeid; + + error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); + if (error) + goto out; + + send_args(r, lkb, ms); + + ms->m_result = rv; + + error = send_message(mh, ms); + out: + return error; +} + +static int send_request_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + return send_common_reply(r, lkb, DLM_MSG_REQUEST_REPLY, rv); +} + +static int send_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + return send_common_reply(r, lkb, DLM_MSG_CONVERT_REPLY, rv); +} + +static int send_unlock_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + return send_common_reply(r, lkb, DLM_MSG_UNLOCK_REPLY, rv); +} + +static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv); +} + +static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in, + int ret_nodeid, int rv) +{ + struct dlm_rsb *r = &ls->ls_stub_rsb; + struct dlm_message *ms; + struct dlm_mhandle *mh; + int error, nodeid = ms_in->m_header.h_nodeid; + + error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh); + if (error) + goto out; + + ms->m_lkid = ms_in->m_lkid; + ms->m_result = rv; + ms->m_nodeid = ret_nodeid; + + error = send_message(mh, ms); + out: + return error; +} + +/* which args we save from a received message depends heavily on the type + of message, unlike the send side where we can safely send everything about + the lkb for any type of message */ + +static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) +{ + lkb->lkb_exflags = ms->m_exflags; + lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | + (ms->m_flags & 0x0000FFFF); +} + +static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) +{ + lkb->lkb_sbflags = ms->m_sbflags; + lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | + (ms->m_flags & 0x0000FFFF); +} + +static int receive_extralen(struct dlm_message *ms) +{ + return (ms->m_header.h_length - sizeof(struct dlm_message)); +} + +static int receive_range(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + if (lkb->lkb_flags & DLM_IFL_RANGE) { + if (!lkb->lkb_range) + lkb->lkb_range = allocate_range(ls); + if (!lkb->lkb_range) + return -ENOMEM; + lkb->lkb_range[RQ_RANGE_START] = ms->m_range[0]; + lkb->lkb_range[RQ_RANGE_END] = ms->m_range[1]; + } + return 0; +} + +static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + int len; + + if (lkb->lkb_exflags & DLM_LKF_VALBLK) { + if (!lkb->lkb_lvbptr) + lkb->lkb_lvbptr = allocate_lvb(ls); + if (!lkb->lkb_lvbptr) + return -ENOMEM; + len = receive_extralen(ms); + memcpy(lkb->lkb_lvbptr, ms->m_extra, len); + } + return 0; +} + +static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + lkb->lkb_nodeid = ms->m_header.h_nodeid; + lkb->lkb_ownpid = ms->m_pid; + lkb->lkb_remid = ms->m_lkid; + lkb->lkb_grmode = DLM_LOCK_IV; + lkb->lkb_rqmode = ms->m_rqmode; + lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); + lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); + + DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); + + if (receive_range(ls, lkb, ms)) + return -ENOMEM; + + if (receive_lvb(ls, lkb, ms)) + return -ENOMEM; + + return 0; +} + +static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + if (lkb->lkb_nodeid != ms->m_header.h_nodeid) { + log_error(ls, "convert_args nodeid %d %d lkid %x %x", + lkb->lkb_nodeid, ms->m_header.h_nodeid, + lkb->lkb_id, lkb->lkb_remid); + return -EINVAL; + } + + if (!is_master_copy(lkb)) + return -EINVAL; + + if (lkb->lkb_status != DLM_LKSTS_GRANTED) + return -EBUSY; + + if (receive_range(ls, lkb, ms)) + return -ENOMEM; + if (lkb->lkb_range) { + lkb->lkb_range[GR_RANGE_START] = 0LL; + lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; + } + + if (receive_lvb(ls, lkb, ms)) + return -ENOMEM; + + lkb->lkb_rqmode = ms->m_rqmode; + lkb->lkb_lvbseq = ms->m_lvbseq; + + return 0; +} + +static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + if (!is_master_copy(lkb)) + return -EINVAL; + if (receive_lvb(ls, lkb, ms)) + return -ENOMEM; + return 0; +} + +/* We fill in the stub-lkb fields with the info that send_xxxx_reply() + uses to send a reply and that the remote end uses to process the reply. */ + +static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb = &ls->ls_stub_lkb; + lkb->lkb_nodeid = ms->m_header.h_nodeid; + lkb->lkb_remid = ms->m_lkid; +} + +static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error, namelen; + + error = create_lkb(ls, &lkb); + if (error) + goto fail; + + receive_flags(lkb, ms); + lkb->lkb_flags |= DLM_IFL_MSTCPY; + error = receive_request_args(ls, lkb, ms); + if (error) { + put_lkb(lkb); + goto fail; + } + + namelen = receive_extralen(ms); + + error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r); + if (error) { + put_lkb(lkb); + goto fail; + } + + lock_rsb(r); + + attach_lkb(r, lkb); + error = do_request(r, lkb); + send_req