diff options
Diffstat (limited to 'fs/ocfs2/cluster')
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 22 | ||||
-rw-r--r-- | fs/ocfs2/cluster/masklog.h | 22 | ||||
-rw-r--r-- | fs/ocfs2/cluster/ocfs2_heartbeat.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 16 |
4 files changed, 51 insertions, 10 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 21f38accd03..504595d6cf6 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -54,7 +54,7 @@ static DECLARE_RWSEM(o2hb_callback_sem); * multiple hb threads are watching multiple regions. A node is live * whenever any of the threads sees activity from the node in its region. */ -static spinlock_t o2hb_live_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(o2hb_live_lock); static struct list_head o2hb_live_slots[O2NM_MAX_NODES]; static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; static LIST_HEAD(o2hb_node_events); @@ -517,6 +517,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg, hb_block->hb_seq = cpu_to_le64(cputime); hb_block->hb_node = node_num; hb_block->hb_generation = cpu_to_le64(generation); + hb_block->hb_dead_ms = cpu_to_le32(o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS); /* This step must always happen last! */ hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg, @@ -645,6 +646,8 @@ static int o2hb_check_slot(struct o2hb_region *reg, struct o2nm_node *node; struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block; u64 cputime; + unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; + unsigned int slot_dead_ms; memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); @@ -733,6 +736,23 @@ fire_callbacks: &o2hb_live_slots[slot->ds_node_num]); slot->ds_equal_samples = 0; + + /* We want to be sure that all nodes agree on the + * number of milliseconds before a node will be + * considered dead. The self-fencing timeout is + * computed from this value, and a discrepancy might + * result in heartbeat calling a node dead when it + * hasn't self-fenced yet. */ + slot_dead_ms = le32_to_cpu(hb_block->hb_dead_ms); + if (slot_dead_ms && slot_dead_ms != dead_ms) { + /* TODO: Perhaps we can fail the region here. */ + mlog(ML_ERROR, "Node %d on device %s has a dead count " + "of %u ms, but our count is %u ms.\n" + "Please double check your configuration values " + "for 'O2CB_HEARTBEAT_THRESHOLD'\n", + slot->ds_node_num, reg->hr_dev_name, slot_dead_ms, + dead_ms); + } goto out; } diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 73edad78253..a42628ba9dd 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -123,6 +123,17 @@ #define MLOG_MASK_PREFIX 0 #endif +/* + * When logging is disabled, force the bit test to 0 for anything other + * than errors and notices, allowing gcc to remove the code completely. + * When enabled, allow all masks. + */ +#if defined(CONFIG_OCFS2_DEBUG_MASKLOG) +#define ML_ALLOWED_BITS ~0 +#else +#define ML_ALLOWED_BITS (ML_ERROR|ML_NOTICE) +#endif + #define MLOG_MAX_BITS 64 struct mlog_bits { @@ -187,7 +198,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; #define mlog(mask, fmt, args...) do { \ u64 __m = MLOG_MASK_PREFIX | (mask); \ - if (__mlog_test_u64(__m, mlog_and_bits) && \ + if ((__m & ML_ALLOWED_BITS) && \ + __mlog_test_u64(__m, mlog_and_bits) && \ !__mlog_test_u64(__m, mlog_not_bits)) { \ if (__m & ML_ERROR) \ __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \ @@ -204,6 +216,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ } while (0) +#if defined(CONFIG_OCFS2_DEBUG_MASKLOG) #define mlog_entry(fmt, args...) do { \ mlog(ML_ENTRY, "ENTRY:" fmt , ##args); \ } while (0) @@ -247,6 +260,13 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; #define mlog_exit_void() do { \ mlog(ML_EXIT, "EXIT\n"); \ } while (0) +#else +#define mlog_entry(...) do { } while (0) +#define mlog_entry_void(...) do { } while (0) +#define mlog_exit(...) do { } while (0) +#define mlog_exit_ptr(...) do { } while (0) +#define mlog_exit_void(...) do { } while (0) +#endif /* defined(CONFIG_OCFS2_DEBUG_MASKLOG) */ #define mlog_bug_on_msg(cond, fmt, args...) do { \ if (cond) { \ diff --git a/fs/ocfs2/cluster/ocfs2_heartbeat.h b/fs/ocfs2/cluster/ocfs2_heartbeat.h index 94096069cb4..3f4151da970 100644 --- a/fs/ocfs2/cluster/ocfs2_heartbeat.h +++ b/fs/ocfs2/cluster/ocfs2_heartbeat.h @@ -32,6 +32,7 @@ struct o2hb_disk_heartbeat_block { __u8 hb_pad1[3]; __le32 hb_cksum; __le64 hb_generation; + __le32 hb_dead_ms; }; #endif /* _OCFS2_HEARTBEAT_H */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 0f60cc0d398..b650efa8c8b 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -108,7 +108,7 @@ ##args); \ } while (0) -static rwlock_t o2net_handler_lock = RW_LOCK_UNLOCKED; +static DEFINE_RWLOCK(o2net_handler_lock); static struct rb_root o2net_handler_tree = RB_ROOT; static struct o2net_node o2net_nodes[O2NM_MAX_NODES]; @@ -396,8 +396,8 @@ static void o2net_set_nn_state(struct o2net_node *nn, } if (was_valid && !valid) { - mlog(ML_NOTICE, "no longer connected to " SC_NODEF_FMT "\n", - SC_NODEF_ARGS(old_sc)); + printk(KERN_INFO "o2net: no longer connected to " + SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); o2net_complete_nodes_nsw(nn); } @@ -409,10 +409,10 @@ static void o2net_set_nn_state(struct o2net_node *nn, * the only way to start connecting again is to down * heartbeat and bring it back up. */ cancel_delayed_work(&nn->nn_connect_expired); - mlog(ML_NOTICE, "%s " SC_NODEF_FMT "\n", - o2nm_this_node() > sc->sc_node->nd_num ? - "connected to" : "accepted connection from", - SC_NODEF_ARGS(sc)); + printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", + o2nm_this_node() > sc->sc_node->nd_num ? + "connected to" : "accepted connection from", + SC_NODEF_ARGS(sc)); } /* trigger the connecting worker func as long as we're not valid, @@ -1280,7 +1280,7 @@ static void o2net_idle_timer(unsigned long data) do_gettimeofday(&now); - mlog(ML_NOTICE, "connection to " SC_NODEF_FMT " has been idle for 10 " + printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); mlog(ML_NOTICE, "here are some times that might help debug the " "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " |