diff options
Diffstat (limited to 'fs/gfs2/glock.c')
| -rw-r--r-- | fs/gfs2/glock.c | 2814 |
1 files changed, 1357 insertions, 1457 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7175a4d0643..ee4e04fe60f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1,16 +1,17 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/delay.h> #include <linux/sort.h> @@ -18,10 +19,8 @@ #include <linux/kallsyms.h> #include <linux/gfs2_ondisk.h> #include <linux/list.h> -#include <linux/lm_interface.h> #include <linux/wait.h> #include <linux/module.h> -#include <linux/rwsem.h> #include <asm/uaccess.h> #include <linux/seq_file.h> #include <linux/debugfs.h> @@ -29,127 +28,53 @@ #include <linux/freezer.h> #include <linux/workqueue.h> #include <linux/jiffies.h> +#include <linux/rcupdate.h> +#include <linux/rculist_bl.h> +#include <linux/bit_spinlock.h> +#include <linux/percpu.h> +#include <linux/list_sort.h> +#include <linux/lockref.h> #include "gfs2.h" #include "incore.h" #include "glock.h" #include "glops.h" #include "inode.h" -#include "lm.h" #include "lops.h" #include "meta_io.h" #include "quota.h" #include "super.h" #include "util.h" - -struct gfs2_gl_hash_bucket { - struct hlist_head hb_list; -}; - -struct glock_iter { - int hash; /* hash bucket index */ - struct gfs2_sbd *sdp; /* incore superblock */ - struct gfs2_glock *gl; /* current glock struct */ - struct seq_file *seq; /* sequence file for debugfs */ - char string[512]; /* scratch space */ +#include "bmap.h" +#define CREATE_TRACE_POINTS +#include "trace_gfs2.h" + +struct gfs2_glock_iter { + int hash; /* hash bucket index */ + unsigned nhash; /* Index within current bucket */ + struct gfs2_sbd *sdp; /* incore superblock */ + struct gfs2_glock *gl; /* current glock struct */ + loff_t last_pos; /* last position */ }; typedef void (*glock_examiner) (struct gfs2_glock * gl); -static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); -static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); -static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); -static void gfs2_glock_drop_th(struct gfs2_glock *gl); -static void run_queue(struct gfs2_glock *gl); +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); -static DECLARE_RWSEM(gfs2_umount_flush_sem); static struct dentry *gfs2_root; -static struct task_struct *scand_process; -static unsigned int scand_secs = 5; static struct workqueue_struct *glock_workqueue; +struct workqueue_struct *gfs2_delete_workqueue; +static LIST_HEAD(lru_list); +static atomic_t lru_count = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) -static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; +static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; static struct dentry *gfs2_root; -/* - * Despite what you might think, the numbers below are not arbitrary :-) - * They are taken from the ipv4 routing hash code, which is well tested - * and thus should be nearly optimal. Later on we might tweek the numbers - * but for now this should be fine. - * - * The reason for putting the locks in a separate array from the list heads - * is that we can have fewer locks than list heads and save memory. We use - * the same hash function for both, but with a different hash mask. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) - -#ifdef CONFIG_LOCKDEP -# define GL_HASH_LOCK_SZ 256 -#else -# if NR_CPUS >= 32 -# define GL_HASH_LOCK_SZ 4096 -# elif NR_CPUS >= 16 -# define GL_HASH_LOCK_SZ 2048 -# elif NR_CPUS >= 8 -# define GL_HASH_LOCK_SZ 1024 -# elif NR_CPUS >= 4 -# define GL_HASH_LOCK_SZ 512 -# else -# define GL_HASH_LOCK_SZ 256 -# endif -#endif - -/* We never want more locks than chains */ -#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ -# undef GL_HASH_LOCK_SZ -# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE -#endif - -static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ]; - -static inline rwlock_t *gl_lock_addr(unsigned int x) -{ - return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)]; -} -#else /* not SMP, so no spinlocks required */ -static inline rwlock_t *gl_lock_addr(unsigned int x) -{ - return NULL; -} -#endif - -/** - * relaxed_state_ok - is a requested lock compatible with the current lock mode? - * @actual: the current state of the lock - * @requested: the lock state that was requested by the caller - * @flags: the modifier flags passed in by the caller - * - * Returns: 1 if the locks are compatible, 0 otherwise - */ - -static inline int relaxed_state_ok(unsigned int actual, unsigned requested, - int flags) -{ - if (actual == requested) - return 1; - - if (flags & GL_EXACT) - return 0; - - if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED) - return 1; - - if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY)) - return 1; - - return 0; -} - /** * gl_hash() - Turn glock number into hash bucket number * @lock: The glock number @@ -170,25 +95,35 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp, return h; } -/** - * glock_free() - Perform a few checks and then release struct gfs2_glock - * @gl: The glock to release - * - * Also calls lock module to release its internal structure for this glock. - * - */ +static inline void spin_lock_bucket(unsigned int hash) +{ + hlist_bl_lock(&gl_hash_table[hash]); +} -static void glock_free(struct gfs2_glock *gl) +static inline void spin_unlock_bucket(unsigned int hash) { - struct gfs2_sbd *sdp = gl->gl_sbd; - struct inode *aspace = gl->gl_aspace; + hlist_bl_unlock(&gl_hash_table[hash]); +} - gfs2_lm_put_lock(sdp, gl->gl_lock); +static void gfs2_glock_dealloc(struct rcu_head *rcu) +{ + struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); + + if (gl->gl_ops->go_flags & GLOF_ASPACE) { + kmem_cache_free(gfs2_glock_aspace_cachep, gl); + } else { + kfree(gl->gl_lksb.sb_lvbptr); + kmem_cache_free(gfs2_glock_cachep, gl); + } +} - if (aspace) - gfs2_aspace_put(aspace); +void gfs2_glock_free(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; - kmem_cache_free(gfs2_glock_cachep, gl); + call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); } /** @@ -197,9 +132,61 @@ static void glock_free(struct gfs2_glock *gl) * */ -void gfs2_glock_hold(struct gfs2_glock *gl) +static void gfs2_glock_hold(struct gfs2_glock *gl) { - atomic_inc(&gl->gl_ref); + GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); + lockref_get(&gl->gl_lockref); +} + +/** + * demote_ok - Check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: 1 if it's ok + */ + +static int demote_ok(const struct gfs2_glock *gl) +{ + const struct gfs2_glock_operations *glops = gl->gl_ops; + + if (gl->gl_state == LM_ST_UNLOCKED) + return 0; + if (!list_empty(&gl->gl_holders)) + return 0; + if (glops->go_demote_ok) + return glops->go_demote_ok(gl); + return 1; +} + + +void gfs2_glock_add_to_lru(struct gfs2_glock *gl) +{ + spin_lock(&lru_lock); + + if (!list_empty(&gl->gl_lru)) + list_del_init(&gl->gl_lru); + else + atomic_inc(&lru_count); + + list_add_tail(&gl->gl_lru, &lru_list); + set_bit(GLF_LRU, &gl->gl_flags); + spin_unlock(&lru_lock); +} + +static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +{ + if (!list_empty(&gl->gl_lru)) { + list_del_init(&gl->gl_lru); + atomic_dec(&lru_count); + clear_bit(GLF_LRU, &gl->gl_flags); + } +} + +static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +{ + spin_lock(&lru_lock); + __gfs2_glock_remove_from_lru(gl); + spin_unlock(&lru_lock); } /** @@ -208,27 +195,27 @@ void gfs2_glock_hold(struct gfs2_glock *gl) * */ -int gfs2_glock_put(struct gfs2_glock *gl) +void gfs2_glock_put(struct gfs2_glock *gl) { - int rv = 0; struct gfs2_sbd *sdp = gl->gl_sbd; + struct address_space *mapping = gfs2_glock2aspace(gl); - write_lock(gl_lock_addr(gl->gl_hash)); - if (atomic_dec_and_test(&gl->gl_ref)) { - hlist_del(&gl->gl_list); - write_unlock(gl_lock_addr(gl->gl_hash)); - gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); - gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); - gfs2_assert(sdp, list_empty(&gl->gl_holders)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); - glock_free(gl); - rv = 1; - goto out; - } - write_unlock(gl_lock_addr(gl->gl_hash)); -out: - return rv; + if (lockref_put_or_lock(&gl->gl_lockref)) + return; + + lockref_mark_dead(&gl->gl_lockref); + + spin_lock(&lru_lock); + __gfs2_glock_remove_from_lru(gl); + spin_unlock(&lru_lock); + spin_unlock(&gl->gl_lockref.lock); + spin_lock_bucket(gl->gl_hash); + hlist_bl_del_rcu(&gl->gl_list); + spin_unlock_bucket(gl->gl_hash); + GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); + GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + trace_gfs2_glock_put(gl); + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } /** @@ -244,836 +231,715 @@ static struct gfs2_glock *search_bucket(unsigned int hash, const struct lm_lockname *name) { struct gfs2_glock *gl; - struct hlist_node *h; + struct hlist_bl_node *h; - hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { + hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) { if (!lm_name_equal(&gl->gl_name, name)) continue; if (gl->gl_sbd != sdp) continue; - - atomic_inc(&gl->gl_ref); - - return gl; + if (lockref_get_not_dead(&gl->gl_lockref)) + return gl; } return NULL; } /** - * gfs2_glock_find() - Find glock by lock number - * @sdp: The GFS2 superblock - * @name: The lock name + * may_grant - check if its ok to grant a new lock + * @gl: The glock + * @gh: The lock request which we wish to grant * - * Returns: NULL, or the struct gfs2_glock with the requested number + * Returns: true if its ok to grant the lock */ -static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, - const struct lm_lockname *name) +static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) { - unsigned int hash = gl_hash(sdp, name); - struct gfs2_glock *gl; - - read_lock(gl_lock_addr(hash)); - gl = search_bucket(hash, sdp, name); - read_unlock(gl_lock_addr(hash)); - - return gl; + const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list); + if ((gh->gh_state == LM_ST_EXCLUSIVE || + gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) + return 0; + if (gl->gl_state == gh->gh_state) + return 1; + if (gh->gh_flags & GL_EXACT) + return 0; + if (gl->gl_state == LM_ST_EXCLUSIVE) { + if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED) + return 1; + if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED) + return 1; + } + if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY)) + return 1; + return 0; } -static void glock_work_func(struct work_struct *work) +static void gfs2_holder_wake(struct gfs2_holder *gh) { - struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); - - spin_lock(&gl->gl_spin); - if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) - set_bit(GLF_DEMOTE, &gl->gl_flags); - run_queue(gl); - spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb__after_atomic(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); } /** - * gfs2_glock_get() - Get a glock, or create one if one doesn't exist - * @sdp: The GFS2 superblock - * @number: the lock number - * @glops: The glock_operations to use - * @create: If 0, don't create the glock if it doesn't exist - * @glp: the glock is returned here - * - * This does not lock a glock, just finds/creates structures for one. + * do_error - Something unexpected has happened during a lock request * - * Returns: errno */ -int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, int create, - struct gfs2_glock **glp) +static inline void do_error(struct gfs2_glock *gl, const int ret) { - struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; - struct gfs2_glock *gl, *tmp; - unsigned int hash = gl_hash(sdp, &name); - int error; + struct gfs2_holder *gh, *tmp; - read_lock(gl_lock_addr(hash)); - gl = search_bucket(hash, sdp, &name); - read_unlock(gl_lock_addr(hash)); - - if (gl || !create) { - *glp = gl; - return 0; + list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (ret & LM_OUT_ERROR) + gh->gh_error = -EIO; + else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) + gh->gh_error = GLR_TRYFAILED; + else + continue; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); } +} - gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); - if (!gl) - return -ENOMEM; +/** + * do_promote - promote as many requests as possible on the current queue + * @gl: The glock + * + * Returns: 1 if there is a blocked holder at the head of the list, or 2 + * if a type specific operation is underway. + */ - gl->gl_flags = 0; - gl->gl_name = name; - atomic_set(&gl->gl_ref, 1); - gl->gl_state = LM_ST_UNLOCKED; - gl->gl_demote_state = LM_ST_EXCLUSIVE; - gl->gl_hash = hash; - gl->gl_owner_pid = NULL; - gl->gl_ip = 0; - gl->gl_ops = glops; - gl->gl_req_gh = NULL; - gl->gl_req_bh = NULL; - gl->gl_vn = 0; - gl->gl_stamp = jiffies; - gl->gl_tchange = jiffies; - gl->gl_object = NULL; - gl->gl_sbd = sdp; - gl->gl_aspace = NULL; - INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); +static int do_promote(struct gfs2_glock *gl) +__releases(&gl->gl_spin) +__acquires(&gl->gl_spin) +{ + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_holder *gh, *tmp; + int ret; - /* If this glock protects actual on-disk data or metadata blocks, - create a VFS inode to manage the pages/buffers holding them. */ - if (glops == &gfs2_inode_glops || glops == &gfs2_rgrp_glops) { - gl->gl_aspace = gfs2_aspace_get(sdp); - if (!gl->gl_aspace) { - error = -ENOMEM; - goto fail; +restart: + list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (may_grant(gl, gh)) { + if (gh->gh_list.prev == &gl->gl_holders && + glops->go_lock) { + spin_unlock(&gl->gl_spin); + /* FIXME: eliminate this eventually */ + ret = glops->go_lock(gh); + spin_lock(&gl->gl_spin); + if (ret) { + if (ret == 1) + return 2; + gh->gh_error = ret; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + goto restart; + } + set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 1); + gfs2_holder_wake(gh); + goto restart; + } + set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 0); + gfs2_holder_wake(gh); + continue; } + if (gh->gh_list.prev == &gl->gl_holders) + return 1; + do_error(gl, 0); + break; } - - error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock); - if (error) - goto fail_aspace; - - write_lock(gl_lock_addr(hash)); - tmp = search_bucket(hash, sdp, &name); - if (tmp) { - write_unlock(gl_lock_addr(hash)); - glock_free(gl); - gl = tmp; - } else { - hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); - write_unlock(gl_lock_addr(hash)); - } - - *glp = gl; - return 0; - -fail_aspace: - if (gl->gl_aspace) - gfs2_aspace_put(gl->gl_aspace); -fail: - kmem_cache_free(gfs2_glock_cachep, gl); - return error; } /** - * gfs2_holder_init - initialize a struct gfs2_holder in the default way + * find_first_waiter - find the first gh that's waiting for the glock * @gl: the glock - * @state: the state we're requesting - * @flags: the modifier flags - * @gh: the holder structure - * */ -void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, - struct gfs2_holder *gh) +static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) { - INIT_LIST_HEAD(&gh->gh_list); - gh->gh_gl = gl; - gh->gh_ip = (unsigned long)__builtin_return_address(0); - gh->gh_owner_pid = get_pid(task_pid(current)); - gh->gh_state = state; - gh->gh_flags = flags; - gh->gh_error = 0; - gh->gh_iflags = 0; - gfs2_glock_hold(gl); -} - -/** - * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it - * @state: the state we're requesting - * @flags: the modifier flags - * @gh: the holder structure - * - * Don't mess with the glock. - * - */ + struct gfs2_holder *gh; -void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh) -{ - gh->gh_state = state; - gh->gh_flags = flags; - gh->gh_iflags = 0; - gh->gh_ip = (unsigned long)__builtin_return_address(0); + list_for_each_entry(gh, &gl->gl_holders, gh_list) { + if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) + return gh; + } + return NULL; } /** - * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) - * @gh: the holder structure + * state_change - record that the glock is now in a different state + * @gl: the glock + * @new_state the new state * */ -void gfs2_holder_uninit(struct gfs2_holder *gh) +static void state_change(struct gfs2_glock *gl, unsigned int new_state) { - put_pid(gh->gh_owner_pid); - gfs2_glock_put(gh->gh_gl); - gh->gh_gl = NULL; - gh->gh_ip = 0; -} + int held1, held2; -static void gfs2_holder_wake(struct gfs2_holder *gh) -{ - clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb__after_clear_bit(); - wake_up_bit(&gh->gh_iflags, HIF_WAIT); -} + held1 = (gl->gl_state != LM_ST_UNLOCKED); + held2 = (new_state != LM_ST_UNLOCKED); -static int just_schedule(void *word) -{ - schedule(); - return 0; -} + if (held1 != held2) { + GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); + if (held2) + gl->gl_lockref.count++; + else + gl->gl_lockref.count--; + } + if (held1 && held2 && list_empty(&gl->gl_holders)) + clear_bit(GLF_QUEUED, &gl->gl_flags); -static void wait_on_holder(struct gfs2_holder *gh) -{ - might_sleep(); - wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); + if (new_state != gl->gl_target) + /* shorten our minimum hold time */ + gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, + GL_GLOCK_MIN_HOLD); + gl->gl_state = new_state; + gl->gl_tchange = jiffies; } static void gfs2_demote_wake(struct gfs2_glock *gl) { gl->gl_demote_state = LM_ST_EXCLUSIVE; - clear_bit(GLF_DEMOTE, &gl->gl_flags); - smp_mb__after_clear_bit(); - wake_up_bit(&gl->gl_flags, GLF_DEMOTE); -} - -static void wait_on_demote(struct gfs2_glock *gl) -{ - might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); + clear_bit(GLF_DEMOTE, &gl->gl_flags); + smp_mb__after_atomic(); + wake_up_bit(&gl->gl_flags, GLF_DEMOTE); } /** - * rq_mutex - process a mutex request in the queue - * @gh: the glock holder + * finish_xmote - The DLM has replied to one of our lock requests + * @gl: The glock + * @ret: The status from the DLM * - * Returns: 1 if the queue is blocked */ -static int rq_mutex(struct gfs2_holder *gh) +static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) { - struct gfs2_glock *gl = gh->gh_gl; - - list_del_init(&gh->gh_list); - /* gh->gh_error never examined. */ - set_bit(GLF_LOCK, &gl->gl_flags); - clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb(); - wake_up_bit(&gh->gh_iflags, HIF_WAIT); - - return 1; -} - -/** - * rq_promote - process a promote request in the queue - * @gh: the glock holder - * - * Acquire a new inter-node lock, or change a lock state to more restrictive. - * - * Returns: 1 if the queue is blocked - */ + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_holder *gh; + unsigned state = ret & LM_OUT_ST_MASK; + int rv; -static int rq_promote(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; + spin_lock(&gl->gl_spin); + trace_gfs2_glock_state_change(gl, state); + state_change(gl, state); + gh = find_first_waiter(gl); + + /* Demote to UN request arrived during demote to SH or DF */ + if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && + state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) + gl->gl_target = LM_ST_UNLOCKED; + + /* Check for state != intended state */ + if (unlikely(state != gl->gl_target)) { + if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { + /* move to back of queue and try next entry */ + if (ret & LM_OUT_CANCELED) { + if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0) + list_move_tail(&gh->gh_list, &gl->gl_holders); + gh = find_first_waiter(gl); + gl->gl_target = gh->gh_state; + goto retry; + } + /* Some error or failed "try lock" - report it */ + if ((ret & LM_OUT_ERROR) || + (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { + gl->gl_target = gl->gl_state; + do_error(gl, ret); + goto out; + } + } + switch(state) { + /* Unlocked due to conversion deadlock, try again */ + case LM_ST_UNLOCKED: +retry: + do_xmote(gl, gh, gl->gl_target); + break; + /* Conversion fails, unlock and try again */ + case LM_ST_SHARED: + case LM_ST_DEFERRED: + do_xmote(gl, gh, LM_ST_UNLOCKED); + break; + default: /* Everything else */ + pr_err("wanted %u got %u\n", gl->gl_target, state); + GLOCK_BUG_ON(gl, 1); + } + spin_unlock(&gl->gl_spin); + return; + } - if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - if (list_empty(&gl->gl_holders)) { - gl->gl_req_gh = gh; - set_bit(GLF_LOCK, &gl->gl_flags); + /* Fast path - we got what we asked for */ + if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) + gfs2_demote_wake(gl); + if (state != LM_ST_UNLOCKED) { + if (glops->go_xmote_bh) { spin_unlock(&gl->gl_spin); - gfs2_glock_xmote_th(gh->gh_gl, gh); + rv = glops->go_xmote_bh(gl, gh); spin_lock(&gl->gl_spin); + if (rv) { + do_error(gl, rv); + goto out; + } } - return 1; - } - - if (list_empty(&gl->gl_holders)) { - set_bit(HIF_FIRST, &gh->gh_iflags); - set_bit(GLF_LOCK, &gl->gl_flags); - } else { - struct gfs2_holder *next_gh; - if (gh->gh_state == LM_ST_EXCLUSIVE) - return 1; - next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, - gh_list); - if (next_gh->gh_state == LM_ST_EXCLUSIVE) - return 1; + rv = do_promote(gl); + if (rv == 2) + goto out_locked; } - - list_move_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); - - gfs2_holder_wake(gh); - - return 0; +out: + clear_bit(GLF_LOCK, &gl->gl_flags); +out_locked: + spin_unlock(&gl->gl_spin); } /** - * rq_demote - process a demote request in the queue - * @gh: the glock holder + * do_xmote - Calls the DLM to change the state of a lock + * @gl: The lock state + * @gh: The holder (only for promotes) + * @target: The target lock state * - * Returns: 1 if the queue is blocked */ -static int rq_demote(struct gfs2_glock *gl) +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) +__releases(&gl->gl_spin) +__acquires(&gl->gl_spin) { - if (!list_empty(&gl->gl_holders)) - return 1; + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_sbd *sdp = gl->gl_sbd; + unsigned int lck_flags = gh ? gh->gh_flags : 0; + int ret; - if (gl->gl_state == gl->gl_demote_state || - gl->gl_state == LM_ST_UNLOCKED) { - gfs2_demote_wake(gl); - return 0; + lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | + LM_FLAG_PRIORITY); + GLOCK_BUG_ON(gl, gl->gl_state == target); + GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); + if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && + glops->go_inval) { + set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); + do_error(gl, 0); /* Fail queued try locks */ } + gl->gl_req = target; + set_bit(GLF_BLOCKING, &gl->gl_flags); + if ((gl->gl_req == LM_ST_UNLOCKED) || + (gl->gl_state == LM_ST_EXCLUSIVE) || + (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) + clear_bit(GLF_BLOCKING, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + if (glops->go_sync) + glops->go_sync(gl); + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) + glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); + clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); - set_bit(GLF_LOCK, &gl->gl_flags); - set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - - if (gl->gl_demote_state == LM_ST_UNLOCKED || - gl->gl_state != LM_ST_EXCLUSIVE) { - spin_unlock(&gl->gl_spin); - gfs2_glock_drop_th(gl); - } else { - spin_unlock(&gl->gl_spin); - gfs2_glock_xmote_th(gl, NULL); + gfs2_glock_hold(gl); + if (sdp->sd_lockstruct.ls_ops->lm_lock) { + /* lock_dlm */ + ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); + if (ret) { + pr_err("lm_lock ret %d\n", ret); + GLOCK_BUG_ON(gl, 1); + } + } else { /* lock_nolock */ + finish_xmote(gl, target); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); } spin_lock(&gl->gl_spin); - clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - - return 0; } /** - * run_queue - process holder structures on a glock + * find_first_holder - find the first "holder" gh * @gl: the glock - * */ -static void run_queue(struct gfs2_glock *gl) + +static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) { struct gfs2_holder *gh; - int blocked = 1; - for (;;) { - if (test_bit(GLF_LOCK, &gl->gl_flags)) - break; - - if (!list_empty(&gl->gl_waiters1)) { - gh = list_entry(gl->gl_waiters1.next, - struct gfs2_holder, gh_list); - blocked = rq_mutex(gh); - } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { - blocked = rq_demote(gl); - if (gl->gl_waiters2 && !blocked) { - set_bit(GLF_DEMOTE, &gl->gl_flags); - gl->gl_demote_state = LM_ST_UNLOCKED; - } - gl->gl_waiters2 = 0; - } else if (!list_empty(&gl->gl_waiters3)) { - gh = list_entry(gl->gl_waiters3.next, - struct gfs2_holder, gh_list); - blocked = rq_promote(gh); - } else - break; - - if (blocked) - break; + if (!list_empty(&gl->gl_holders)) { + gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + return gh; } + return NULL; } /** - * gfs2_glmutex_lock - acquire a local lock on a glock - * @gl: the glock + * run_queue - do all outstanding tasks related to a glock + * @gl: The glock in question + * @nonblock: True if we must not block in run_queue * - * Gives caller exclusive access to manipulate a glock structure. */ -static void gfs2_glmutex_lock(struct gfs2_glock *gl) +static void run_queue(struct gfs2_glock *gl, const int nonblock) +__releases(&gl->gl_spin) +__acquires(&gl->gl_spin) { - spin_lock(&gl->gl_spin); - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - struct gfs2_holder gh; - - gfs2_holder_init(gl, 0, 0, &gh); - set_bit(HIF_WAIT, &gh.gh_iflags); - list_add_tail(&gh.gh_list, &gl->gl_waiters1); - spin_unlock(&gl->gl_spin); - wait_on_holder(&gh); - gfs2_holder_uninit(&gh); - } else { - gl->gl_owner_pid = get_pid(task_pid(current)); - gl->gl_ip = (unsigned long)__builtin_return_address(0); - spin_unlock(&gl->gl_spin); - } -} - -/** - * gfs2_glmutex_trylock - try to acquire a local lock on a glock - * @gl: the glock - * - * Returns: 1 if the glock is acquired - */ + struct gfs2_holder *gh = NULL; + int ret; -static int gfs2_glmutex_trylock(struct gfs2_glock *gl) -{ - int acquired = 1; + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) + return; - spin_lock(&gl->gl_spin); - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - acquired = 0; + GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); + + if (test_bit(GLF_DEMOTE, &gl->gl_flags) && + gl->gl_demote_state != gl->gl_state) { + if (find_first_holder(gl)) + goto out_unlock; + if (nonblock) + goto out_sched; + set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); + GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); + gl->gl_target = gl->gl_demote_state; } else { - gl->gl_owner_pid = get_pid(task_pid(current)); - gl->gl_ip = (unsigned long)__builtin_return_address(0); + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) + gfs2_demote_wake(gl); + ret = do_promote(gl); + if (ret == 0) + goto out_unlock; + if (ret == 2) + goto out; + gh = find_first_waiter(gl); + gl->gl_target = gh->gh_state; + if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) + do_error(gl, 0); /* Fail queued try locks */ } - spin_unlock(&gl->gl_spin); + do_xmote(gl, gh, gl->gl_target); +out: + return; - return acquired; -} +out_sched: + clear_bit(GLF_LOCK, &gl->gl_flags); + smp_mb__after_atomic(); + gl->gl_lockref.count++; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gl->gl_lockref.count--; + return; -/** - * gfs2_glmutex_unlock - release a local lock on a glock - * @gl: the glock - * - */ +out_unlock: + clear_bit(GLF_LOCK, &gl->gl_flags); + smp_mb__after_atomic(); + return; +} -static void gfs2_glmutex_unlock(struct gfs2_glock *gl) +static void delete_work_func(struct work_struct *work) { - struct pid *pid; + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_inode *ip; + struct inode *inode; + u64 no_addr = gl->gl_name.ln_number; - spin_lock(&gl->gl_spin); - clear_bit(GLF_LOCK, &gl->gl_flags); - pid = gl->gl_owner_pid; - gl->gl_owner_pid = NULL; - gl->gl_ip = 0; - run_queue(gl); - spin_unlock(&gl->gl_spin); + ip = gl->gl_object; + /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ - put_pid(pid); + if (ip) + inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1); + else + inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); + if (inode && !IS_ERR(inode)) { + d_prune_aliases(inode); + iput(inode); + } + gfs2_glock_put(gl); } -/** - * handle_callback - process a demote request - * @gl: the glock - * @state: the state the caller wants us to change to - * - * There are only two requests that we are going to see in actual - * practise: LM_ST_SHARED and LM_ST_UNLOCKED - */ - -static void handle_callback(struct gfs2_glock *gl, unsigned int state, - int remote, unsigned long delay) +static void glock_work_func(struct work_struct *work) { - int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; + unsigned long delay = 0; + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + int drop_ref = 0; + if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { + finish_xmote(gl, gl->gl_reply); + drop_ref = 1; + } spin_lock(&gl->gl_spin); - set_bit(bit, &gl->gl_flags); - if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { - gl->gl_demote_state = state; - gl->gl_demote_time = jiffies; - if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && - gl->gl_object) { - gfs2_glock_schedule_for_reclaim(gl); - spin_unlock(&gl->gl_spin); - return; + if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + gl->gl_state != LM_ST_UNLOCKED && + gl->gl_demote_state != LM_ST_EXCLUSIVE) { + unsigned long holdtime, now = jiffies; + + holdtime = gl->gl_tchange + gl->gl_hold_time; + if (time_before(now, holdtime)) + delay = holdtime - now; + + if (!delay) { + clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); + set_bit(GLF_DEMOTE, &gl->gl_flags); } - } else if (gl->gl_demote_state != LM_ST_UNLOCKED && - gl->gl_demote_state != state) { - if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) - gl->gl_waiters2 = 1; - else - gl->gl_demote_state = LM_ST_UNLOCKED; } + run_queue(gl, 0); spin_unlock(&gl->gl_spin); + if (!delay) + gfs2_glock_put(gl); + else { + if (gl->gl_name.ln_type != LM_TYPE_INODE) + delay = 0; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); + } + if (drop_ref) + gfs2_glock_put(gl); } /** - * state_change - record that the glock is now in a different state - * @gl: the glock - * @new_state the new state + * gfs2_glock_get() - Get a glock, or create one if one doesn't exist + * @sdp: The GFS2 superblock + * @number: the lock number + * @glops: The glock_operations to use + * @create: If 0, don't create the glock if it doesn't exist + * @glp: the glock is returned here + * + * This does not lock a glock, just finds/creates structures for one. * + * Returns: errno */ -static void state_change(struct gfs2_glock *gl, unsigned int new_state) +int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, + const struct gfs2_glock_operations *glops, int create, + struct gfs2_glock **glp) { - int held1, held2; - - held1 = (gl->gl_state != LM_ST_UNLOCKED); - held2 = (new_state != LM_ST_UNLOCKED); - - if (held1 != held2) { - if (held2) - gfs2_glock_hold(gl); - else - gfs2_glock_put(gl); - } + struct super_block *s = sdp->sd_vfs; + struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; + struct gfs2_glock *gl, *tmp; + unsigned int hash = gl_hash(sdp, &name); + struct address_space *mapping; + struct kmem_cache *cachep; - gl->gl_state = new_state; - gl->gl_tchange = jiffies; -} + rcu_read_lock(); + gl = search_bucket(hash, sdp, &name); + rcu_read_unlock(); -/** - * xmote_bh - Called after the lock module is done acquiring a lock - * @gl: The glock in question - * @ret: the int returned from the lock module - * - */ + *glp = gl; + if (gl) + return 0; + if (!create) + return -ENOENT; -static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_holder *gh = gl->gl_req_gh; - int prev_state = gl->gl_state; - int op_done = 1; - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); - - state_change(gl, ret & LM_OUT_ST_MASK); - - if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) { - if (glops->go_inval) - glops->go_inval(gl, DIO_METADATA); - } else if (gl->gl_state == LM_ST_DEFERRED) { - /* We might not want to do this here. - Look at moving to the inode glops. */ - if (glops->go_inval) - glops->go_inval(gl, 0); - } + if (glops->go_flags & GLOF_ASPACE) + cachep = gfs2_glock_aspace_cachep; + else + cachep = gfs2_glock_cachep; + gl = kmem_cache_alloc(cachep, GFP_NOFS); + if (!gl) + return -ENOMEM; - /* Deal with each possible exit condition */ + memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); - if (!gh) { - gl->gl_stamp = jiffies; - if (ret & LM_OUT_CANCELED) { - op_done = 0; - } else { - spin_lock(&gl->gl_spin); - if (gl->gl_state != gl->gl_demote_state) { - gl->gl_req_bh = NULL; - spin_unlock(&gl->gl_spin); - gfs2_glock_drop_th(gl); - gfs2_glock_put(gl); - return; - } - gfs2_demote_wake(gl); - spin_unlock(&gl->gl_spin); - } - } else { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - gh->gh_error = -EIO; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - goto out; - gh->gh_error = GLR_CANCELED; - if (ret & LM_OUT_CANCELED) - goto out; - if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - list_add_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); - set_bit(HIF_FIRST, &gh->gh_iflags); - op_done = 0; - goto out; + if (glops->go_flags & GLOF_LVB) { + gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS); + if (!gl->gl_lksb.sb_lvbptr) { + kmem_cache_free(cachep, gl); + return -ENOMEM; } - gh->gh_error = GLR_TRYFAILED; - if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) - goto out; - gh->gh_error = -EINVAL; - if (gfs2_assert_withdraw(sdp, 0) == -1) - fs_err(sdp, "ret = 0x%.8X\n", ret); -out: - spin_unlock(&gl->gl_spin); } - if (glops->go_xmote_bh) - glops->go_xmote_bh(gl); + atomic_inc(&sdp->sd_glock_disposal); + gl->gl_sbd = sdp; + gl->gl_flags = 0; + gl->gl_name = name; + gl->gl_lockref.count = 1; + gl->gl_state = LM_ST_UNLOCKED; + gl->gl_target = LM_ST_UNLOCKED; + gl->gl_demote_state = LM_ST_EXCLUSIVE; + gl->gl_hash = hash; + gl->gl_ops = glops; + gl->gl_dstamp = ktime_set(0, 0); + preempt_disable(); + /* We use the global stats to estimate the initial per-glock stats */ + gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type]; + preempt_enable(); + gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; + gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; + gl->gl_tchange = jiffies; + gl->gl_object = NULL; + gl->gl_hold_time = GL_GLOCK_DFT_HOLD; + INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); + INIT_WORK(&gl->gl_delete, delete_work_func); + + mapping = gfs2_glock2aspace(gl); + if (mapping) { + mapping->a_ops = &gfs2_meta_aops; + mapping->host = s->s_bdev->bd_inode; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_NOFS); + mapping->private_data = NULL; + mapping->backing_dev_info = s->s_bdi; + mapping->writeback_index = 0; + } - if (op_done) { - spin_lock(&gl->gl_spin); - gl->gl_req_gh = NULL; - gl->gl_req_bh = NULL; - clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_lock_bucket(hash); + tmp = search_bucket(hash, sdp, &name); + if (tmp) { + spin_unlock_bucket(hash); + kfree(gl->gl_lksb.sb_lvbptr); + kmem_cache_free(cachep, gl); + atomic_dec(&sdp->sd_glock_disposal); + gl = tmp; + } else { + hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]); + spin_unlock_bucket(hash); } - gfs2_glock_put(gl); + *glp = gl; - if (gh) - gfs2_holder_wake(gh); + return 0; } /** - * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock - * @gl: The glock in question - * @state: the requested state - * @flags: modifier flags to the lock call + * gfs2_holder_init - initialize a struct gfs2_holder in the default way + * @gl: the glock + * @state: the state we're requesting + * @flags: the modifier flags + * @gh: the holder structure * */ -static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) +void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, + struct gfs2_holder *gh) { - struct gfs2_sbd *sdp = gl->gl_sbd; - int flags = gh ? gh->gh_flags : 0; - unsigned state = gh ? gh->gh_state : gl->gl_demote_state; - const struct gfs2_glock_operations *glops = gl->gl_ops; - int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | - LM_FLAG_NOEXP | LM_FLAG_ANY | - LM_FLAG_PRIORITY); - unsigned int lck_ret; - - if (glops->go_xmote_th) - glops->go_xmote_th(gl); - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); - gfs2_assert_warn(sdp, state != gl->gl_state); - + INIT_LIST_HEAD(&gh->gh_list); + gh->gh_gl = gl; + gh->gh_ip = (unsigned long)__builtin_return_address(0); + gh->gh_owner_pid = get_pid(task_pid(current)); + gh->gh_state = state; + gh->gh_flags = flags; + gh->gh_error = 0; + gh->gh_iflags = 0; gfs2_glock_hold(gl); - gl->gl_req_bh = xmote_bh; - - lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); - - if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR))) - return; - - if (lck_ret & LM_OUT_ASYNC) - gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC); - else - xmote_bh(gl, lck_ret); } /** - * drop_bh - Called after a lock module unlock completes - * @gl: the glock - * @ret: the return status + * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it + * @state: the state we're requesting + * @flags: the modifier flags + * @gh: the holder structure * - * Doesn't wake up the process waiting on the struct gfs2_holder (if any) - * Doesn't drop the reference on the glock the top half took out + * Don't mess with the glock. * */ -static void drop_bh(struct gfs2_glock *gl, unsigned int ret) +void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh) { - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_holder *gh = gl->gl_req_gh; - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, !ret); - - state_change(gl, LM_ST_UNLOCKED); - - if (glops->go_inval) - glops->go_inval(gl, DIO_METADATA); - - if (gh) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - gh->gh_error = 0; - spin_unlock(&gl->gl_spin); - } - - spin_lock(&gl->gl_spin); - gfs2_demote_wake(gl); - gl->gl_req_gh = NULL; - gl->gl_req_bh = NULL; - clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - gfs2_glock_put(gl); - - if (gh) - gfs2_holder_wake(gh); + gh->gh_state = state; + gh->gh_flags = flags; + gh->gh_iflags = 0; + gh->gh_ip = (unsigned long)__builtin_return_address(0); + if (gh->gh_owner_pid) + put_pid(gh->gh_owner_pid); + gh->gh_owner_pid = get_pid(task_pid(current)); } /** - * gfs2_glock_drop_th - call into the lock module to unlock a lock - * @gl: the glock + * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) + * @gh: the holder structure * */ -static void gfs2_glock_drop_th(struct gfs2_glock *gl) +void gfs2_holder_uninit(struct gfs2_holder *gh) { - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - unsigned int ret; - - if (glops->go_xmote_th) - glops->go_xmote_th(gl); - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); - - gfs2_glock_hold(gl); - gl->gl_req_bh = drop_bh; - - ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); - - if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR))) - return; - - if (!ret) - drop_bh(gl, ret); - else - gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC); + put_pid(gh->gh_owner_pid); + gfs2_glock_put(gh->gh_gl); + gh->gh_gl = NULL; + gh->gh_ip = 0; } /** - * do_cancels - cancel requests for locks stuck waiting on an expire flag - * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock + * gfs2_glock_holder_wait + * @word: unused * - * Don't cancel GL_NOCANCEL requests. + * This function and gfs2_glock_demote_wait both show up in the WCHAN + * field. Thus I've separated these otherwise identical functions in + * order to be more informative to the user. */ -static void do_cancels(struct gfs2_holder *gh) +static int gfs2_glock_holder_wait(void *word) { - struct gfs2_glock *gl = gh->gh_gl; - - spin_lock(&gl->gl_spin); - - while (gl->gl_req_gh != gh && - !test_bit(HIF_HOLDER, &gh->gh_iflags) && - !list_empty(&gh->gh_list)) { - if (gl->gl_req_bh && !(gl->gl_req_gh && - (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { - spin_unlock(&gl->gl_spin); - gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock); - msleep(100); - spin_lock(&gl->gl_spin); - } else { - spin_unlock(&gl->gl_spin); - msleep(100); - spin_lock(&gl->gl_spin); - } - } + schedule(); + return 0; +} - spin_unlock(&gl->gl_spin); +static int gfs2_glock_demote_wait(void *word) +{ + schedule(); + return 0; } /** - * glock_wait_internal - wait on a glock acquisition + * gfs2_glock_wait - wait on a glock acquisition * @gh: the glock holder * * Returns: 0 on success */ -static int glock_wait_internal(struct gfs2_holder *gh) +int gfs2_glock_wait(struct gfs2_holder *gh) { - struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - - if (test_bit(HIF_ABORTED, &gh->gh_iflags)) - return -EIO; - - if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { - spin_lock(&gl->gl_spin); - if (gl->gl_req_gh != gh && - !test_bit(HIF_HOLDER, &gh->gh_iflags) && - !list_empty(&gh->gh_list)) { - list_del_init(&gh->gh_list); - gh->gh_error = GLR_TRYFAILED; - run_queue(gl); - spin_unlock(&gl->gl_spin); - return gh->gh_error; - } - spin_unlock(&gl->gl_spin); - } - - if (gh->gh_flags & LM_FLAG_PRIORITY) - do_cancels(gh); - - wait_on_holder(gh); - if (gh->gh_error) - return gh->gh_error; - - gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags)); - gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state, - gh->gh_flags)); - - if (test_bit(HIF_FIRST, &gh->gh_iflags)) { - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - - if (glops->go_lock) { - gh->gh_error = glops->go_lock(gh); - if (gh->gh_error) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - spin_unlock(&gl->gl_spin); - } - } - - spin_lock(&gl->gl_spin); - gl->gl_req_gh = NULL; - gl->gl_req_bh = NULL; - clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); - spin_unlock(&gl->gl_spin); - } + unsigned long time1 = jiffies; + might_sleep(); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); + if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ + /* Lengthen the minimum hold time. */ + gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + + GL_GLOCK_HOLD_INCR, + GL_GLOCK_MAX_HOLD); return gh->gh_error; } -static inline struct gfs2_holder * -find_holder_by_owner(struct list_head *head, struct pid *pid) +/** + * handle_callback - process a demote request + * @gl: the glock + * @state: the state the caller wants us to change to + * + * There are only two requests that we are going to see in actual + * practise: LM_ST_SHARED and LM_ST_UNLOCKED + */ + +static void handle_callback(struct gfs2_glock *gl, unsigned int state, + unsigned long delay, bool remote) { - struct gfs2_holder *gh; + int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; - list_for_each_entry(gh, head, gh_list) { - if (gh->gh_owner_pid == pid) - return gh; + set_bit(bit, &gl->gl_flags); + if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { + gl->gl_demote_state = state; + gl->gl_demote_time = jiffies; + } else if (gl->gl_demote_state != LM_ST_UNLOCKED && + gl->gl_demote_state != state) { + gl->gl_demote_state = LM_ST_UNLOCKED; } - - return NULL; + if (gl->gl_ops->go_callback) + gl->gl_ops->go_callback(gl, remote); + trace_gfs2_demote_rq(gl, remote); } -static void print_dbg(struct glock_iter *gi, const char *fmt, ...) +void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - if (gi) { - vsprintf(gi->string, fmt, args); - seq_printf(gi->seq, gi->string); + + if (seq) { + seq_vprintf(seq, fmt, args); + } else { + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("%pV", &vaf); } - else - vprintk(fmt, args); + va_end(args); } @@ -1081,50 +947,81 @@ static void print_dbg(struct glock_iter *gi, const char *fmt, ...) * add_to_queue - Add a holder to the wait queue (but look for recursion) * @gh: the holder structure to add * + * Eventually we should move the recursive locking trap to a + * debugging option or something like that. This is the fast + * path and needs to have the minimum number of distractions. + * */ -static void add_to_queue(struct gfs2_holder *gh) +static inline void add_to_queue(struct gfs2_holder *gh) +__releases(&gl->gl_spin) +__acquires(&gl->gl_spin) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_holder *existing; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct list_head *insert_pt = NULL; + struct gfs2_holder *gh2; + int try_futile = 0; BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); - if (!(gh->gh_flags & GL_FLOCK)) { - existing = find_holder_by_owner(&gl->gl_holders, - gh->gh_owner_pid); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", - existing->gh_ip); - printk(KERN_INFO "pid : %d\n", - pid_nr(existing->gh_owner_pid)); - printk(KERN_INFO "lock type : %d lock state : %d\n", - existing->gh_gl->gl_name.ln_type, - existing->gh_gl->gl_state); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", - pid_nr(gh->gh_owner_pid)); - printk(KERN_INFO "lock type : %d lock state : %d\n", - gl->gl_name.ln_type, gl->gl_state); - BUG(); - } - - existing = find_holder_by_owner(&gl->gl_waiters3, - gh->gh_owner_pid); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", - existing->gh_ip); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - BUG(); + if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { + if (test_bit(GLF_LOCK, &gl->gl_flags)) + try_futile = !may_grant(gl, gh); + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) + goto fail; + } + + list_for_each_entry(gh2, &gl->gl_holders, gh_list) { + if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && + (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) + goto trap_recursive; + if (try_futile && + !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { +fail: + gh->gh_error = GLR_TRYFAILED; + gfs2_holder_wake(gh); + return; } + if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) + continue; + if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) + insert_pt = &gh2->gh_list; + } + set_bit(GLF_QUEUED, &gl->gl_flags); + trace_gfs2_glock_queue(gh, 1); + gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); + gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); + if (likely(insert_pt == NULL)) { + list_add_tail(&gh->gh_list, &gl->gl_holders); + if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) + goto do_cancel; + return; + } + list_add_tail(&gh->gh_list, insert_pt); +do_cancel: + gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { + spin_unlock(&gl->gl_spin); + if (sdp->sd_lockstruct.ls_ops->lm_cancel) + sdp->sd_lockstruct.ls_ops->lm_cancel(gl); + spin_lock(&gl->gl_spin); } + return; - if (gh->gh_flags & LM_FLAG_PRIORITY) - list_add(&gh->gh_list, &gl->gl_waiters3); - else - list_add_tail(&gh->gh_list, &gl->gl_waiters3); +trap_recursive: + pr_err("original: %pSR\n", (void *)gh2->gh_ip); + pr_err("pid: %d\n", pid_nr(gh2->gh_owner_pid)); + pr_err("lock type: %d req lock state : %d\n", + gh2->gh_gl->gl_name.ln_type, gh2->gh_state); + pr_err("new: %pSR\n", (void *)gh->gh_ip); + pr_err("pid: %d\n", pid_nr(gh->gh_owner_pid)); + pr_err("lock type: %d req lock state : %d\n", + gh->gh_gl->gl_name.ln_type, gh->gh_state); + gfs2_dump_glock(NULL, gl); + BUG(); } /** @@ -1142,24 +1039,26 @@ int gfs2_glock_nq(struct gfs2_holder *gh) struct gfs2_sbd *sdp = gl->gl_sbd; int error = 0; -restart: - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { - set_bit(HIF_ABORTED, &gh->gh_iflags); + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return -EIO; - } + + if (test_bit(GLF_LRU, &gl->gl_flags)) + gfs2_glock_remove_from_lru(gl); spin_lock(&gl->gl_spin); add_to_queue(gh); - run_queue(gl); + if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && + test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + gl->gl_lockref.count++; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gl->gl_lockref.count--; + } + run_queue(gl, 1); spin_unlock(&gl->gl_spin); - if (!(gh->gh_flags & GL_ASYNC)) { - error = glock_wait_internal(gh); - if (error == GLR_CANCELED) { - msleep(100); - goto restart; - } - } + if (!(gh->gh_flags & GL_ASYNC)) + error = gfs2_glock_wait(gh); return error; } @@ -1173,48 +1072,7 @@ restart: int gfs2_glock_poll(struct gfs2_holder *gh) { - struct gfs2_glock *gl = gh->gh_gl; - int ready = 0; - - spin_lock(&gl->gl_spin); - - if (test_bit(HIF_HOLDER, &gh->gh_iflags)) - ready = 1; - else if (list_empty(&gh->gh_list)) { - if (gh->gh_error == GLR_CANCELED) { - spin_unlock(&gl->gl_spin); - msleep(100); - if (gfs2_glock_nq(gh)) - return 1; - return 0; - } else - ready = 1; - } - - spin_unlock(&gl->gl_spin); - - return ready; -} - -/** - * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC - * @gh: the holder structure - * - * Returns: 0, GLR_TRYFAILED, or errno on failure - */ - -int gfs2_glock_wait(struct gfs2_holder *gh) -{ - int error; - - error = glock_wait_internal(gh); - if (error == GLR_CANCELED) { - msleep(100); - gh->gh_flags &= ~GL_ASYNC; - error = gfs2_glock_nq(gh); - } - - return error; + return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; } /** @@ -1228,31 +1086,39 @@ void gfs2_glock_dq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned delay = 0; + int fast_path = 0; + spin_lock(&gl->gl_spin); if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED, 0, 0); + handle_callback(gl, LM_ST_UNLOCKED, 0, false); - gfs2_glmutex_lock(gl); - - spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); - - if (list_empty(&gl->gl_holders)) { + if (find_first_holder(gl) == NULL) { if (glops->go_unlock) { + GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); spin_unlock(&gl->gl_spin); glops->go_unlock(gh); spin_lock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); } - gl->gl_stamp = jiffies; + if (list_empty(&gl->gl_holders) && + !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + fast_path = 1; } + if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) + gfs2_glock_add_to_lru(gl); - clear_bit(GLF_LOCK, &gl->gl_flags); + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); + if (likely(fast_path)) + return; gfs2_glock_hold(gl); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && - !test_bit(GLF_DEMOTE, &gl->gl_flags)) - delay = gl->gl_ops->go_min_hold_time; + !test_bit(GLF_DEMOTE, &gl->gl_flags) && + gl->gl_name.ln_type == LM_TYPE_INODE) + delay = gl->gl_hold_time; if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1261,7 +1127,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); - wait_on_demote(gl); + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** @@ -1282,7 +1149,7 @@ void gfs2_glock_dq_uninit(struct gfs2_holder *gh) * @number: the lock number * @glops: the glock operations for the type of glock * @state: the state to acquire the glock in - * @flags: modifier flags for the aquisition + * @flags: modifier flags for the acquisition * @gh: the struct gfs2_holder * * Returns: errno @@ -1407,332 +1274,304 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) { - unsigned int x; - - for (x = 0; x < num_gh; x++) - gfs2_glock_dq(&ghs[x]); + while (num_gh--) + gfs2_glock_dq(&ghs[num_gh]); } -/** - * gfs2_glock_dq_uninit_m - release multiple glocks - * @num_gh: the number of structures - * @ghs: an array of struct gfs2_holder structures - * - */ - -void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) +void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) { - unsigned int x; + unsigned long delay = 0; + unsigned long holdtime; + unsigned long now = jiffies; - for (x = 0; x < num_gh; x++) - gfs2_glock_dq_uninit(&ghs[x]); + gfs2_glock_hold(gl); + holdtime = gl->gl_tchange + gl->gl_hold_time; + if (test_bit(GLF_QUEUED, &gl->gl_flags) && + gl->gl_name.ln_type == LM_TYPE_INODE) { + if (time_before(now, holdtime)) + delay = holdtime - now; + if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + delay = gl->gl_hold_time; + } + + spin_lock(&gl->gl_spin); + handle_callback(gl, state, delay, true); + spin_unlock(&gl->gl_spin); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); } /** - * gfs2_lvb_hold - attach a LVB from a glock + * gfs2_should_freeze - Figure out if glock should be frozen * @gl: The glock in question * + * Glocks are not frozen if (a) the result of the dlm operation is + * an error, (b) the locking operation was an unlock operation or + * (c) if there is a "noexp" flagged request anywhere in the queue + * + * Returns: 1 if freezing should occur, 0 otherwise */ -int gfs2_lvb_hold(struct gfs2_glock *gl) +static int gfs2_should_freeze(const struct gfs2_glock *gl) { - int error; + const struct gfs2_holder *gh; - gfs2_glmutex_lock(gl); + if (gl->gl_reply & ~LM_OUT_ST_MASK) + return 0; + if (gl->gl_target == LM_ST_UNLOCKED) + return 0; - if (!atomic_read(&gl->gl_lvb_count)) { - error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); - if (error) { - gfs2_glmutex_unlock(gl); - return error; - } - gfs2_glock_hold(gl); + list_for_each_entry(gh, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (LM_FLAG_NOEXP & gh->gh_flags) + return 0; } - atomic_inc(&gl->gl_lvb_count); - - gfs2_glmutex_unlock(gl); - return 0; + return 1; } /** - * gfs2_lvb_unhold - detach a LVB from a glock - * @gl: The glock in question + * gfs2_glock_complete - Callback used by locking + * @gl: Pointer to the glock + * @ret: The return value from the dlm * + * The gl_reply field is under the gl_spin lock so that it is ok + * to use a bitfield shared with other glock state fields. */ -void gfs2_lvb_unhold(struct gfs2_glock *gl) +void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { - gfs2_glock_hold(gl); - gfs2_glmutex_lock(gl); + struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; - gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); - if (atomic_dec_and_test(&gl->gl_lvb_count)) { - gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb); - gl->gl_lvb = NULL; - gfs2_glock_put(gl); + spin_lock(&gl->gl_spin); + gl->gl_reply = ret; + + if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { + if (gfs2_should_freeze(gl)) { + set_bit(GLF_FROZEN, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + return; + } } - gfs2_glmutex_unlock(gl); - gfs2_glock_put(gl); + gl->gl_lockref.count++; + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); } -static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, - unsigned int state) +static int glock_cmp(void *priv, struct list_head *a, struct list_head *b) { - struct gfs2_glock *gl; - unsigned long delay = 0; - unsigned long holdtime; - unsigned long now = jiffies; + struct gfs2_glock *gla, *glb; - gl = gfs2_glock_find(sdp, name); - if (!gl) - return; + gla = list_entry(a, struct gfs2_glock, gl_lru); + glb = list_entry(b, struct gfs2_glock, gl_lru); - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; - if (time_before(now, holdtime)) - delay = holdtime - now; + if (gla->gl_name.ln_number > glb->gl_name.ln_number) + return 1; + if (gla->gl_name.ln_number < glb->gl_name.ln_number) + return -1; - handle_callback(gl, state, 1, delay); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) - gfs2_glock_put(gl); + return 0; } /** - * gfs2_glock_cb - Callback used by locking module - * @sdp: Pointer to the superblock - * @type: Type of callback - * @data: Type dependent data pointer + * gfs2_dispose_glock_lru - Demote a list of glocks + * @list: The list to dispose of + * + * Disposing of glocks may involve disk accesses, so that here we sort + * the glocks by number (i.e. disk location of the inodes) so that if + * there are any such accesses, they'll be sent in order (mostly). * - * Called by the locking module when it wants to tell us something. - * Either we need to drop a lock, one of our ASYNC requests completed, or - * a journal from another client needs to be recovered. + * Must be called under the lru_lock, but may drop and retake this + * lock. While the lru_lock is dropped, entries may vanish from the + * list, but no new entries will appear on the list (since it is + * private) */ -void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) +static void gfs2_dispose_glock_lru(struct list_head *list) +__releases(&lru_lock) +__acquires(&lru_lock) { - struct gfs2_sbd *sdp = cb_data; - - switch (type) { - case LM_CB_NEED_E: - blocking_cb(sdp, data, LM_ST_UNLOCKED); - return; - - case LM_CB_NEED_D: - blocking_cb(sdp, data, LM_ST_DEFERRED); - return; - - case LM_CB_NEED_S: - blocking_cb(sdp, data, LM_ST_SHARED); - return; + struct gfs2_glock *gl; - case LM_CB_ASYNC: { - struct lm_async_cb *async = data; - struct gfs2_glock *gl; + list_sort(NULL, list, glock_cmp); - down_read(&gfs2_umount_flush_sem); - gl = gfs2_glock_find(sdp, &async->lc_name); - if (gfs2_assert_warn(sdp, gl)) - return; - if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) - gl->gl_req_bh(gl, async->lc_ret); + while(!list_empty(list)) { + gl = list_entry(list->next, struct gfs2_glock, gl_lru); + list_del_init(&gl->gl_lru); + if (!spin_trylock(&gl->gl_spin)) { +add_back_to_lru: + list_add(&gl->gl_lru, &lru_list); + atomic_inc(&lru_count); + continue; + } + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + spin_unlock(&gl->gl_spin); + goto add_back_to_lru; + } + clear_bit(GLF_LRU, &gl->gl_flags); + gl->gl_lockref.count++; + if (demote_ok(gl)) + handle_callback(gl, LM_ST_UNLOCKED, 0, false); + WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put(gl); - up_read(&gfs2_umount_flush_sem); - return; - } - - case LM_CB_NEED_RECOVERY: - gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data); - if (sdp->sd_recoverd_process) - wake_up_process(sdp->sd_recoverd_process); - return; - - case LM_CB_DROPLOCKS: - gfs2_gl_hash_clear(sdp, NO_WAIT); - gfs2_quota_scan(sdp); - return; - - default: - gfs2_assert_warn(sdp, 0); - return; + gl->gl_lockref.count--; + spin_unlock(&gl->gl_spin); + cond_resched_lock(&lru_lock); } } /** - * demote_ok - Check to see if it's ok to unlock a glock - * @gl: the glock + * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote + * @nr: The number of entries to scan * - * Returns: 1 if it's ok + * This function selects the entries on the LRU which are able to + * be demoted, and then kicks off the process by calling + * gfs2_dispose_glock_lru() above. */ -static int demote_ok(struct gfs2_glock *gl) +static long gfs2_scan_glock_lru(int nr) { - const struct gfs2_glock_operations *glops = gl->gl_ops; - int demote = 1; + struct gfs2_glock *gl; + LIST_HEAD(skipped); + LIST_HEAD(dispose); + long freed = 0; + + spin_lock(&lru_lock); + while ((nr-- >= 0) && !list_empty(&lru_list)) { + gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); + + /* Test for being demotable */ + if (!test_bit(GLF_LOCK, &gl->gl_flags)) { + list_move(&gl->gl_lru, &dispose); + atomic_dec(&lru_count); + freed++; + continue; + } - if (test_bit(GLF_STICKY, &gl->gl_flags)) - demote = 0; - else if (glops->go_demote_ok) - demote = glops->go_demote_ok(gl); + list_move(&gl->gl_lru, &skipped); + } + list_splice(&skipped, &lru_list); + if (!list_empty(&dispose)) + gfs2_dispose_glock_lru(&dispose); + spin_unlock(&lru_lock); - return demote; + return freed; } -/** - * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list - * @gl: the glock - * - */ - -void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) +static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { - struct gfs2_sbd *sdp = gl->gl_sbd; - - spin_lock(&sdp->sd_reclaim_lock); - if (list_empty(&gl->gl_reclaim)) { - gfs2_glock_hold(gl); - list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); - atomic_inc(&sdp->sd_reclaim_count); - } - spin_unlock(&sdp->sd_reclaim_lock); + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + return gfs2_scan_glock_lru(sc->nr_to_scan); +} - wake_up(&sdp->sd_reclaim_wq); +static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_read(&lru_count)); } +static struct shrinker glock_shrinker = { + .seeks = DEFAULT_SEEKS, + .count_objects = gfs2_glock_shrink_count, + .scan_objects = gfs2_glock_shrink_scan, +}; + /** - * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list + * examine_bucket - Call a function for glock in a hash bucket + * @examiner: the function * @sdp: the filesystem - * - * Called from gfs2_glockd() glock reclaim daemon, or when promoting a - * different glock and we notice that there are a lot of glocks in the - * reclaim list. + * @bucket: the bucket * */ -void gfs2_reclaim_glock(struct gfs2_sbd *sdp) +static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, + unsigned int hash) { struct gfs2_glock *gl; + struct hlist_bl_head *head = &gl_hash_table[hash]; + struct hlist_bl_node *pos; - spin_lock(&sdp->sd_reclaim_lock); - if (list_empty(&sdp->sd_reclaim_list)) { - spin_unlock(&sdp->sd_reclaim_lock); - return; + rcu_read_lock(); + hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { + if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) + examiner(gl); } - gl = list_entry(sdp->sd_reclaim_list.next, - struct gfs2_glock, gl_reclaim); - list_del_init(&gl->gl_reclaim); - spin_unlock(&sdp->sd_reclaim_lock); - - atomic_dec(&sdp->sd_reclaim_count); - atomic_inc(&sdp->sd_reclaimed); + rcu_read_unlock(); + cond_resched(); +} - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED, 0, 0); - gfs2_glmutex_unlock(gl); - } +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) +{ + unsigned x; - gfs2_glock_put(gl); + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) + examine_bucket(examiner, sdp, x); } + /** - * examine_bucket - Call a function for glock in a hash bucket - * @examiner: the function - * @sdp: the filesystem - * @bucket: the bucket + * thaw_glock - thaw out a glock which has an unprocessed reply waiting + * @gl: The glock to thaw * - * Returns: 1 if the bucket has entries */ -static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, - unsigned int hash) +static void thaw_glock(struct gfs2_glock *gl) { - struct gfs2_glock *gl, *prev = NULL; - int has_entries = 0; - struct hlist_head *head = &gl_hash_table[hash].hb_list; - - read_lock(gl_lock_addr(hash)); - /* Can't use hlist_for_each_entry - don't want prefetch here */ - if (hlist_empty(head)) + if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) goto out; - gl = list_entry(head->first, struct gfs2_glock, gl_list); - while(1) { - if (!sdp || gl->gl_sbd == sdp) { - gfs2_glock_hold(gl); - read_unlock(gl_lock_addr(hash)); - if (prev) - gfs2_glock_put(prev); - prev = gl; - examiner(gl); - has_entries = 1; - read_lock(gl_lock_addr(hash)); - } - if (gl->gl_list.next == NULL) - break; - gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list); - } + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) { out: - read_unlock(gl_lock_addr(hash)); - if (prev) - gfs2_glock_put(prev); - cond_resched(); - return has_entries; + gfs2_glock_put(gl); + } } /** - * scan_glock - look at a glock and see if we can reclaim it + * clear_glock - look at a glock and see if we can free it from glock cache * @gl: the glock to look at * */ -static void scan_glock(struct gfs2_glock *gl) +static void clear_glock(struct gfs2_glock *gl) { - if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) - return; - - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - goto out_schedule; - gfs2_glmutex_unlock(gl); - } - return; + gfs2_glock_remove_from_lru(gl); -out_schedule: - gfs2_glmutex_unlock(gl); - gfs2_glock_schedule_for_reclaim(gl); + spin_lock(&gl->gl_spin); + if (gl->gl_state != LM_ST_UNLOCKED) + handle_callback(gl, LM_ST_UNLOCKED, 0, false); + spin_unlock(&gl->gl_spin); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); } /** - * clear_glock - look at a glock and see if we can free it from glock cache - * @gl: the glock to look at + * gfs2_glock_thaw - Thaw any frozen glocks + * @sdp: The super block * */ -static void clear_glock(struct gfs2_glock *gl) +void gfs2_glock_thaw(struct gfs2_sbd *sdp) { - struct gfs2_sbd *sdp = gl->gl_sbd; - int released; - - spin_lock(&sdp->sd_reclaim_lock); - if (!list_empty(&gl->gl_reclaim)) { - list_del_init(&gl->gl_reclaim); - atomic_dec(&sdp->sd_reclaim_count); - spin_unlock(&sdp->sd_reclaim_lock); - released = gfs2_glock_put(gl); - gfs2_assert(sdp, !released); - } else { - spin_unlock(&sdp->sd_reclaim_lock); - } + glock_hash_walk(thaw_glock, sdp); +} - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0, 0); - gfs2_glmutex_unlock(gl); - } +static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +{ + spin_lock(&gl->gl_spin); + gfs2_dump_glock(seq, gl); + spin_unlock(&gl->gl_spin); +} + +static void dump_glock_func(struct gfs2_glock *gl) +{ + dump_glock(NULL, gl); } /** @@ -1740,434 +1579,411 @@ static void clear_glock(struct gfs2_glock *gl) * @sdp: the filesystem * @wait: wait until it's all gone * - * Called when unmounting the filesystem, or when inter-node lock manager - * requests DROPLOCKS because it is running out of capacity. + * Called when unmounting the filesystem. */ -void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) +void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { - unsigned long t; - unsigned int x; - int cont; - - t = jiffies; - - for (;;) { - cont = 0; - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { - if (examine_bucket(clear_glock, sdp, x)) - cont = 1; - } + set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); + flush_workqueue(glock_workqueue); + glock_hash_walk(clear_glock, sdp); + flush_workqueue(glock_workqueue); + wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); + glock_hash_walk(dump_glock_func, sdp); +} - if (!wait || !cont) - break; +void gfs2_glock_finish_truncate(struct gfs2_inode *ip) +{ + struct gfs2_glock *gl = ip->i_gl; + int ret; - if (time_after_eq(jiffies, - t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) { - fs_warn(sdp, "Unmount seems to be stalled. " - "Dumping lock state...\n"); - gfs2_dump_lockstate(sdp); - t = jiffies; - } + ret = gfs2_truncatei_resume(ip); + gfs2_assert_withdraw(gl->gl_sbd, ret == 0); - down_write(&gfs2_umount_flush_sem); - invalidate_inodes(sdp->sd_vfs); - up_write(&gfs2_umount_flush_sem); - msleep(10); - } + spin_lock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); + run_queue(gl, 1); + spin_unlock(&gl->gl_spin); } -/* - * Diagnostic routines to help debug distributed deadlock - */ - -static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, - unsigned long address) +static const char *state2str(unsigned state) { - char buffer[KSYM_SYMBOL_LEN]; - - sprint_symbol(buffer, address); - print_dbg(gi, fmt, buffer); + switch(state) { + case LM_ST_UNLOCKED: + return "UN"; + case LM_ST_SHARED: + return "SH"; + case LM_ST_DEFERRED: + return "DF"; + case LM_ST_EXCLUSIVE: + return "EX"; + } + return "??"; +} + +static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) +{ + char *p = buf; + if (flags & LM_FLAG_TRY) + *p++ = 't'; + if (flags & LM_FLAG_TRY_1CB) + *p++ = 'T'; + if (flags & LM_FLAG_NOEXP) + *p++ = 'e'; + if (flags & LM_FLAG_ANY) + *p++ = 'A'; + if (flags & LM_FLAG_PRIORITY) + *p++ = 'p'; + if (flags & GL_ASYNC) + *p++ = 'a'; + if (flags & GL_EXACT) + *p++ = 'E'; + if (flags & GL_NOCACHE) + *p++ = 'c'; + if (test_bit(HIF_HOLDER, &iflags)) + *p++ = 'H'; + if (test_bit(HIF_WAIT, &iflags)) + *p++ = 'W'; + if (test_bit(HIF_FIRST, &iflags)) + *p++ = 'F'; + *p = 0; + return buf; } /** * dump_holder - print information about a glock holder - * @str: a string naming the type of holder + * @seq: the seq_file struct * @gh: the glock holder * - * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_holder(struct glock_iter *gi, char *str, - struct gfs2_holder *gh) +static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) { - unsigned int x; - struct task_struct *gh_owner; + struct task_struct *gh_owner = NULL; + char flags_buf[32]; - print_dbg(gi, " %s\n", str); - if (gh->gh_owner_pid) { - print_dbg(gi, " owner = %ld ", - (long)pid_nr(gh->gh_owner_pid)); + rcu_read_lock(); + if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); - if (gh_owner) - print_dbg(gi, "(%s)\n", gh_owner->comm); - else - print_dbg(gi, "(ended)\n"); - } else - print_dbg(gi, " owner = -1\n"); - print_dbg(gi, " gh_state = %u\n", gh->gh_state); - print_dbg(gi, " gh_flags ="); - for (x = 0; x < 32; x++) - if (gh->gh_flags & (1 << x)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); - print_dbg(gi, " error = %d\n", gh->gh_error); - print_dbg(gi, " gh_iflags ="); - for (x = 0; x < 32; x++) - if (test_bit(x, &gh->gh_iflags)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); - gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); - - return 0; + gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", + state2str(gh->gh_state), + hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), + gh->gh_error, + gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, + gh_owner ? gh_owner->comm : "(ended)", + (void *)gh->gh_ip); + rcu_read_unlock(); +} + +static const char *gflags2str(char *buf, const struct gfs2_glock *gl) +{ + const unsigned long *gflags = &gl->gl_flags; + char *p = buf; + + if (test_bit(GLF_LOCK, gflags)) + *p++ = 'l'; + if (test_bit(GLF_DEMOTE, gflags)) + *p++ = 'D'; + if (test_bit(GLF_PENDING_DEMOTE, gflags)) + *p++ = 'd'; + if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) + *p++ = 'p'; + if (test_bit(GLF_DIRTY, gflags)) + *p++ = 'y'; + if (test_bit(GLF_LFLUSH, gflags)) + *p++ = 'f'; + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) + *p++ = 'i'; + if (test_bit(GLF_REPLY_PENDING, gflags)) + *p++ = 'r'; + if (test_bit(GLF_INITIAL, gflags)) + *p++ = 'I'; + if (test_bit(GLF_FROZEN, gflags)) + *p++ = 'F'; + if (test_bit(GLF_QUEUED, gflags)) + *p++ = 'q'; + if (test_bit(GLF_LRU, gflags)) + *p++ = 'L'; + if (gl->gl_object) + *p++ = 'o'; + if (test_bit(GLF_BLOCKING, gflags)) + *p++ = 'b'; + *p = 0; + return buf; } /** - * dump_inode - print information about an inode - * @ip: the inode + * gfs2_dump_glock - print information about a glock + * @seq: The seq_file struct + * @gl: the glock + * + * The file format is as follows: + * One line per object, capital letters are used to indicate objects + * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, + * other objects are indented by a single space and follow the glock to + * which they are related. Fields are indicated by lower case letters + * followed by a colon and the field value, except for strings which are in + * [] so that its possible to see if they are composed of spaces for + * example. The field's are n = number (id of the object), f = flags, + * t = type, s = state, r = refcount, e = error, p = pid. * - * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) +void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) { - unsigned int x; - - print_dbg(gi, " Inode:\n"); - print_dbg(gi, " num = %llu/%llu\n", - (unsigned long long)ip->i_no_formal_ino, - (unsigned long long)ip->i_no_addr); - print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); - print_dbg(gi, " i_flags ="); - for (x = 0; x < 32; x++) - if (test_bit(x, &ip->i_flags)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); + const struct gfs2_glock_operations *glops = gl->gl_ops; + unsigned long long dtime; + const struct gfs2_holder *gh; + char gflags_buf[32]; + + dtime = jiffies - gl->gl_demote_time; + dtime *= 1000000/HZ; /* demote time in uSec */ + if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) + dtime = 0; + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n", + state2str(gl->gl_state), + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number, + gflags2str(gflags_buf, gl), + state2str(gl->gl_target), + state2str(gl->gl_demote_state), dtime, + atomic_read(&gl->gl_ail_count), + atomic_read(&gl->gl_revokes), + (int)gl->gl_lockref.count, gl->gl_hold_time); + + list_for_each_entry(gh, &gl->gl_holders, gh_list) + dump_holder(seq, gh); + + if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) + glops->go_dump(seq, gl); +} + +static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) +{ + struct gfs2_glock *gl = iter_ptr; + + seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n", + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number, + (long long)gl->gl_stats.stats[GFS2_LKS_SRTT], + (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], + (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], + (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], + (long long)gl->gl_stats.stats[GFS2_LKS_SIRT], + (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], + (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], + (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); return 0; } -/** - * dump_glock - print information about a glock - * @gl: the glock - * @count: where we are in the buffer - * - * Returns: 0 on success, -ENOBUFS when we run out of space - */ - -static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) -{ - struct gfs2_holder *gh; - unsigned int x; - int error = -ENOBUFS; - struct task_struct *gl_owner; - - spin_lock(&gl->gl_spin); - - print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number); - print_dbg(gi, " gl_flags ="); - for (x = 0; x < 32; x++) { - if (test_bit(x, &gl->gl_flags)) - print_dbg(gi, " %u", x); - } - if (!test_bit(GLF_LOCK, &gl->gl_flags)) - print_dbg(gi, " (unlocked)"); - print_dbg(gi, " \n"); - print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); - print_dbg(gi, " gl_state = %u\n", gl->gl_state); - if (gl->gl_owner_pid) { - gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID); - if (gl_owner) - print_dbg(gi, " gl_owner = pid %d (%s)\n", - pid_nr(gl->gl_owner_pid), gl_owner->comm); - else - print_dbg(gi, " gl_owner = %d (ended)\n", - pid_nr(gl->gl_owner_pid)); - } else - print_dbg(gi, " gl_owner = -1\n"); - print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); - print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); - print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); - print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); - print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); - print_dbg(gi, " reclaim = %s\n", - (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); - if (gl->gl_aspace) - print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, - gl->gl_aspace->i_mapping->nrpages); - else - print_dbg(gi, " aspace = no\n"); - print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); - if (gl->gl_req_gh) { - error = dump_holder(gi, "Request", gl->gl_req_gh); - if (error) - goto out; - } - list_for_each_entry(gh, &gl->gl_holders, gh_list) { - error = dump_holder(gi, "Holder", gh); - if (error) - goto out; - } - list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { - error = dump_holder(gi, "Waiter1", gh); - if (error) - goto out; - } - list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { - error = dump_holder(gi, "Waiter3", gh); - if (error) - goto out; - } - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { - print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", - gl->gl_demote_state, (unsigned long long) - (jiffies - gl->gl_demote_time)*(1000000/HZ)); - } - if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { - if (!test_bit(GLF_LOCK, &gl->gl_flags) && - list_empty(&gl->gl_holders)) { - error = dump_inode(gi, gl->gl_object); - if (error) - goto out; - } else { - error = -ENOBUFS; - print_dbg(gi, " Inode: busy\n"); - } - } - - error = 0; +static const char *gfs2_gltype[] = { + "type", + "reserved", + "nondisk", + "inode", + "rgrp", + "meta", + "iopen", + "flock", + "plock", + "quota", + "journal", +}; -out: - spin_unlock(&gl->gl_spin); - return error; -} +static const char *gfs2_stype[] = { + [GFS2_LKS_SRTT] = "srtt", + [GFS2_LKS_SRTTVAR] = "srttvar", + [GFS2_LKS_SRTTB] = "srttb", + [GFS2_LKS_SRTTVARB] = "srttvarb", + [GFS2_LKS_SIRT] = "sirt", + [GFS2_LKS_SIRTVAR] = "sirtvar", + [GFS2_LKS_DCOUNT] = "dlm", + [GFS2_LKS_QCOUNT] = "queue", +}; -/** - * gfs2_dump_lockstate - print out the current lockstate - * @sdp: the filesystem - * @ub: the buffer to copy the information into - * - * If @ub is NULL, dump the lockstate to the console. - * - */ +#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype)) -static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) +static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) { - struct gfs2_glock *gl; - struct hlist_node *h; - unsigned int x; - int error = 0; + struct gfs2_glock_iter *gi = seq->private; + struct gfs2_sbd *sdp = gi->sdp; + unsigned index = gi->hash >> 3; + unsigned subindex = gi->hash & 0x07; + s64 value; + int i; - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { - - read_lock(gl_lock_addr(x)); + if (index == 0 && subindex != 0) + return 0; - hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) { - if (gl->gl_sbd != sdp) - continue; + seq_printf(seq, "%-10s %8s:", gfs2_gltype[index], + (index == 0) ? "cpu": gfs2_stype[subindex]); - error = dump_glock(NULL, gl); - if (error) - break; + for_each_possible_cpu(i) { + const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); + if (index == 0) { + value = i; + } else { + value = lkstats->lkstats[index - 1].stats[subindex]; } - - read_unlock(gl_lock_addr(x)); - - if (error) - break; + seq_printf(seq, " %15lld", (long long)value); } - - - return error; -} - -/** - * gfs2_scand - Look for cached glocks and inodes to toss from memory - * @sdp: Pointer to GFS2 superblock - * - * One of these daemons runs, finding candidates to add to sd_reclaim_list. - * See gfs2_glockd() - */ - -static int gfs2_scand(void *data) -{ - unsigned x; - unsigned delay; - - while (!kthread_should_stop()) { - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(scan_glock, NULL, x); - if (freezing(current)) - refrigerator(); - delay = scand_secs; - if (delay < 1) - delay = 1; - schedule_timeout_interruptible(delay * HZ); - } - + seq_putc(seq, '\n'); return 0; } - - int __init gfs2_glock_init(void) { unsigned i; for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { - INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); - } -#ifdef GL_HASH_LOCK_SZ - for(i = 0; i < GL_HASH_LOCK_SZ; i++) { - rwlock_init(&gl_hash_locks[i]); + INIT_HLIST_BL_HEAD(&gl_hash_table[i]); } -#endif - scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand"); - if (IS_ERR(scand_process)) - return PTR_ERR(scand_process); - - glock_workqueue = create_workqueue("glock_workqueue"); - if (IS_ERR(glock_workqueue)) { - kthread_stop(scand_process); - return PTR_ERR(glock_workqueue); + glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | + WQ_HIGHPRI | WQ_FREEZABLE, 0); + if (!glock_workqueue) + return -ENOMEM; + gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", + WQ_MEM_RECLAIM | WQ_FREEZABLE, + 0); + if (!gfs2_delete_workqueue) { + destroy_workqueue(glock_workqueue); + return -ENOMEM; } + register_shrinker(&glock_shrinker); + return 0; } void gfs2_glock_exit(void) { + unregister_shrinker(&glock_shrinker); destroy_workqueue(glock_workqueue); - kthread_stop(scand_process); + destroy_workqueue(gfs2_delete_workqueue); } -module_param(scand_secs, uint, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); - -static int gfs2_glock_iter_next(struct glock_iter *gi) +static inline struct gfs2_glock *glock_hash_chain(unsigned hash) { - struct gfs2_glock *gl; - -restart: - read_lock(gl_lock_addr(gi->hash)); - gl = gi->gl; - if (gl) { - gi->gl = hlist_entry(gl->gl_list.next, - struct gfs2_glock, gl_list); - if (gi->gl) - gfs2_glock_hold(gi->gl); - } - read_unlock(gl_lock_addr(gi->hash)); - if (gl) - gfs2_glock_put(gl); - if (gl && gi->gl == NULL) - gi->hash++; - while(gi->gl == NULL) { - if (gi->hash >= GFS2_GL_HASH_SIZE) - return 1; - read_lock(gl_lock_addr(gi->hash)); - gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, - struct gfs2_glock, gl_list); - if (gi->gl) - gfs2_glock_hold(gi->gl); - read_unlock(gl_lock_addr(gi->hash)); - gi->hash++; - } - - if (gi->sdp != gi->gl->gl_sbd) - goto restart; - - return 0; + return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]), + struct gfs2_glock, gl_list); } -static void gfs2_glock_iter_free(struct glock_iter *gi) +static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl) { - if (gi->gl) - gfs2_glock_put(gi->gl); - kfree(gi); + return hlist_bl_entry(rcu_dereference(gl->gl_list.next), + struct gfs2_glock, gl_list); } -static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) +static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { - struct glock_iter *gi; - - gi = kmalloc(sizeof (*gi), GFP_KERNEL); - if (!gi) - return NULL; - - gi->sdp = sdp; - gi->hash = 0; - gi->seq = NULL; - gi->gl = NULL; - memset(gi->string, 0, sizeof(gi->string)); + struct gfs2_glock *gl; - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); - return NULL; - } + do { + gl = gi->gl; + if (gl) { + gi->gl = glock_hash_next(gl); + gi->nhash++; + } else { + if (gi->hash >= GFS2_GL_HASH_SIZE) { + rcu_read_unlock(); + return 1; + } + gi->gl = glock_hash_chain(gi->hash); + gi->nhash = 0; + } + while (gi->gl == NULL) { + gi->hash++; + if (gi->hash >= GFS2_GL_HASH_SIZE) { + rcu_read_unlock(); + return 1; + } + gi->gl = glock_hash_chain(gi->hash); + gi->nhash = 0; + } + /* Skip entries for other sb and dead entries */ + } while (gi->sdp != gi->gl->gl_sbd || + __lockref_is_dead(&gi->gl->gl_lockref)); - return gi; + return 0; } -static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) +static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) { - struct glock_iter *gi; + struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; - gi = gfs2_glock_iter_init(file->private); - if (!gi) - return NULL; + if (gi->last_pos <= *pos) + n = gi->nhash + (*pos - gi->last_pos); + else + gi->hash = 0; + + gi->nhash = 0; + rcu_read_lock(); - while(n--) { - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); + do { + if (gfs2_glock_iter_next(gi)) return NULL; - } - } + } while (n--); - return gi; + gi->last_pos = *pos; + return gi->gl; } -static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, +static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { - struct glock_iter *gi = iter_ptr; + struct gfs2_glock_iter *gi = seq->private; (*pos)++; - - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); + gi->last_pos = *pos; + if (gfs2_glock_iter_next(gi)) return NULL; - } - return gi; + return gi->gl; +} + +static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) +{ + struct gfs2_glock_iter *gi = seq->private; + + if (gi->gl) + rcu_read_unlock(); + gi->gl = NULL; } -static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) +static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) { - struct glock_iter *gi = iter_ptr; - if (gi) - gfs2_glock_iter_free(gi); + dump_glock(seq, iter_ptr); + return 0; } -static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) +static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) { - struct glock_iter *gi = iter_ptr; + struct gfs2_glock_iter *gi = seq->private; - gi->seq = file; - dump_glock(gi, gi->gl); + gi->hash = *pos; + if (*pos >= GFS2_NR_SBSTATS) + return NULL; + preempt_disable(); + return SEQ_START_TOKEN; +} - return 0; +static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, + loff_t *pos) +{ + struct gfs2_glock_iter *gi = seq->private; + (*pos)++; + gi->hash++; + if (gi->hash >= GFS2_NR_SBSTATS) { + preempt_enable(); + return NULL; + } + return SEQ_START_TOKEN; +} + +static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) +{ + preempt_enable(); } static const struct seq_operations gfs2_glock_seq_ops = { @@ -2177,27 +1993,86 @@ static const struct seq_operations gfs2_glock_seq_ops = { .show = gfs2_glock_seq_show, }; -static int gfs2_debugfs_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int ret; +static const struct seq_operations gfs2_glstats_seq_ops = { + .start = gfs2_glock_seq_start, + .next = gfs2_glock_seq_next, + .stop = gfs2_glock_seq_stop, + .show = gfs2_glstats_seq_show, +}; - ret = seq_open(file, &gfs2_glock_seq_ops); - if (ret) - return ret; +static const struct seq_operations gfs2_sbstats_seq_ops = { + .start = gfs2_sbstats_seq_start, + .next = gfs2_sbstats_seq_next, + .stop = gfs2_sbstats_seq_stop, + .show = gfs2_sbstats_seq_show, +}; - seq = file->private_data; - seq->private = inode->i_private; +#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) - return 0; +static int gfs2_glocks_open(struct inode *inode, struct file *file) +{ + int ret = seq_open_private(file, &gfs2_glock_seq_ops, + sizeof(struct gfs2_glock_iter)); + if (ret == 0) { + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); + if (seq->buf) + seq->size = GFS2_SEQ_GOODSIZE; + } + return ret; +} + +static int gfs2_glstats_open(struct inode *inode, struct file *file) +{ + int ret = seq_open_private(file, &gfs2_glstats_seq_ops, + sizeof(struct gfs2_glock_iter)); + if (ret == 0) { + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); + if (seq->buf) + seq->size = GFS2_SEQ_GOODSIZE; + } + return ret; } -static const struct file_operations gfs2_debug_fops = { +static int gfs2_sbstats_open(struct inode *inode, struct file *file) +{ + int ret = seq_open_private(file, &gfs2_sbstats_seq_ops, + sizeof(struct gfs2_glock_iter)); + if (ret == 0) { + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + } + return ret; +} + +static const struct file_operations gfs2_glocks_fops = { + .owner = THIS_MODULE, + .open = gfs2_glocks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static const struct file_operations gfs2_glstats_fops = { .owner = THIS_MODULE, - .open = gfs2_debugfs_open, + .open = gfs2_glstats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release + .release = seq_release_private, +}; + +static const struct file_operations gfs2_sbstats_fops = { + .owner = THIS_MODULE, + .open = gfs2_sbstats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, }; int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) @@ -2208,20 +2083,45 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, - &gfs2_debug_fops); + &gfs2_glocks_fops); if (!sdp->debugfs_dentry_glocks) - return -ENOMEM; + goto fail; + + sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_glstats_fops); + if (!sdp->debugfs_dentry_glstats) + goto fail; + + sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_sbstats_fops); + if (!sdp->debugfs_dentry_sbstats) + goto fail; return 0; +fail: + gfs2_delete_debugfs_file(sdp); + return -ENOMEM; } void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) { - if (sdp && sdp->debugfs_dir) { + if (sdp->debugfs_dir) { if (sdp->debugfs_dentry_glocks) { debugfs_remove(sdp->debugfs_dentry_glocks); sdp->debugfs_dentry_glocks = NULL; } + if (sdp->debugfs_dentry_glstats) { + debugfs_remove(sdp->debugfs_dentry_glstats); + sdp->debugfs_dentry_glstats = NULL; + } + if (sdp->debugfs_dentry_sbstats) { + debugfs_remove(sdp->debugfs_dentry_sbstats); + sdp->debugfs_dentry_sbstats = NULL; + } debugfs_remove(sdp->debugfs_dir); sdp->debugfs_dir = NULL; } |
