From 5bdca4e0768d3e0f4efa43d9a2cc8210aeb91ab9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jul 2012 11:53:34 -0700 Subject: libceph: fix messenger retry In ancient times, the messenger could both initiate and accept connections. An artifact if that was data structures to store/process an incoming ceph_msg_connect request and send an outgoing ceph_msg_connect_reply. Sadly, the negotiation code was referencing those structures and ignoring important information (like the peer's connect_seq) from the correct ones. Among other things, this fixes tight reconnect loops where the server sends RETRY_SESSION and we (the client) retries with the same connect_seq as last time. This bug pretty easily triggered by injecting socket failures on the MDS and running some fs workload like workunits/direct_io/test_sync_io. Signed-off-by: Sage Weil --- include/linux/ceph/messenger.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2521a95fa6d..44c87e731e9 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -163,16 +163,8 @@ struct ceph_connection { /* connection negotiation temps */ char in_banner[CEPH_BANNER_MAX_LEN]; - union { - struct { /* outgoing connection */ - struct ceph_msg_connect out_connect; - struct ceph_msg_connect_reply in_reply; - }; - struct { /* incoming */ - struct ceph_msg_connect in_connect; - struct ceph_msg_connect_reply out_reply; - }; - }; + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; struct ceph_entity_addr actual_peer_addr; /* message out temps */ -- cgit v1.2.3-18-g5258