aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_int.h
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2012-11-09 14:18:43 +0100
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-09 14:20:23 +0100
commit986836503e49ccf7e84b813715d344964ec93566 (patch)
treeb3bea7428efde5b77096cef80e5b6bfee494cc12 /drivers/block/drbd/drbd_int.h
parentccae7868b0c5697508a541c531cf96b361d62c1c (diff)
parent328e0f125bf41f4f33f684db22015f92cb44fe56 (diff)
Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6
Diffstat (limited to 'drivers/block/drbd/drbd_int.h')
-rw-r--r--drivers/block/drbd/drbd_int.h1398
1 files changed, 601 insertions, 797 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 277c69c9465..ef72a72814c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -39,9 +39,13 @@
#include <linux/major.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
+#include <linux/idr.h>
#include <net/tcp.h>
#include <linux/lru_cache.h>
#include <linux/prefetch.h>
+#include <linux/drbd_genl_api.h>
+#include <linux/drbd.h>
+#include "drbd_state.h"
#ifdef __CHECKER__
# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
@@ -61,7 +65,6 @@
extern unsigned int minor_count;
extern bool disable_sendpage;
extern bool allow_oos;
-extern unsigned int cn_idx;
#ifdef CONFIG_DRBD_FAULT_INJECTION
extern int enable_faults;
@@ -86,34 +89,44 @@ extern char usermode_helper[];
*/
#define DRBD_SIGKILL SIGHUP
-/* All EEs on the free list should have ID_VACANT (== 0)
- * freshly allocated EEs get !ID_VACANT (== 1)
- * so if it says "cannot dereference null pointer at address 0x00000001",
- * it is most likely one of these :( */
-
#define ID_IN_SYNC (4711ULL)
#define ID_OUT_OF_SYNC (4712ULL)
-
#define ID_SYNCER (-1ULL)
-#define ID_VACANT 0
-#define is_syncer_block_id(id) ((id) == ID_SYNCER)
+
#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
struct drbd_conf;
+struct drbd_tconn;
/* to shorten dev_warn(DEV, "msg"); and relatives statements */
#define DEV (disk_to_dev(mdev->vdisk))
+#define conn_printk(LEVEL, TCONN, FMT, ARGS...) \
+ printk(LEVEL "d-con %s: " FMT, TCONN->name , ## ARGS)
+#define conn_alert(TCONN, FMT, ARGS...) conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS)
+#define conn_crit(TCONN, FMT, ARGS...) conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS)
+#define conn_err(TCONN, FMT, ARGS...) conn_printk(KERN_ERR, TCONN, FMT, ## ARGS)
+#define conn_warn(TCONN, FMT, ARGS...) conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS)
+#define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS)
+#define conn_info(TCONN, FMT, ARGS...) conn_printk(KERN_INFO, TCONN, FMT, ## ARGS)
+#define conn_dbg(TCONN, FMT, ARGS...) conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS)
+
#define D_ASSERT(exp) if (!(exp)) \
dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
-#define ERR_IF(exp) if (({ \
- int _b = (exp) != 0; \
- if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n", \
- __func__, #exp, __FILE__, __LINE__); \
- _b; \
- }))
+/**
+ * expect - Make an assertion
+ *
+ * Unlike the assert macro, this macro returns a boolean result.
+ */
+#define expect(exp) ({ \
+ bool _bool = (exp); \
+ if (!_bool) \
+ dev_err(DEV, "ASSERTION %s FAILED in %s\n", \
+ #exp, __func__); \
+ _bool; \
+ })
/* Defines to control fault insertion */
enum {
@@ -150,15 +163,12 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
/* usual integer division */
#define div_floor(A, B) ((A)/(B))
-/* drbd_meta-data.c (still in drbd_main.c) */
-/* 4th incarnation of the disk layout. */
-#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
-
-extern struct drbd_conf **minor_table;
extern struct ratelimit_state drbd_ratelimit_state;
+extern struct idr minors; /* RCU, updates: genl_lock() */
+extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */
/* on the wire */
-enum drbd_packets {
+enum drbd_packet {
/* receiver (data socket) */
P_DATA = 0x00,
P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */
@@ -186,7 +196,7 @@ enum drbd_packets {
P_RECV_ACK = 0x15, /* Used in protocol B */
P_WRITE_ACK = 0x16, /* Used in protocol C */
P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
- P_DISCARD_ACK = 0x18, /* Used in proto C, two-primaries conflict detection */
+ P_SUPERSEDED = 0x18, /* Used in proto C, two-primaries conflict detection */
P_NEG_ACK = 0x19, /* Sent if local disk is unusable */
P_NEG_DREPLY = 0x1a, /* Local disk is broken... */
P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */
@@ -207,77 +217,23 @@ enum drbd_packets {
P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */
P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */
P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
+ P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */
+ P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */
+ P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */
+ P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */
- P_MAX_CMD = 0x2A,
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
P_MAX_OPT_CMD = 0x101,
/* special command ids for handshake */
- P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */
- P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */
+ P_INITIAL_META = 0xfff1, /* First Packet on the MetaSock */
+ P_INITIAL_DATA = 0xfff2, /* First Packet on the Socket */
- P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */
+ P_CONNECTION_FEATURES = 0xfffe /* FIXED for the next century! */
};
-static inline const char *cmdname(enum drbd_packets cmd)
-{
- /* THINK may need to become several global tables
- * when we want to support more than
- * one PRO_VERSION */
- static const char *cmdnames[] = {
- [P_DATA] = "Data",
- [P_DATA_REPLY] = "DataReply",
- [P_RS_DATA_REPLY] = "RSDataReply",
- [P_BARRIER] = "Barrier",
- [P_BITMAP] = "ReportBitMap",
- [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
- [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
- [P_UNPLUG_REMOTE] = "UnplugRemote",
- [P_DATA_REQUEST] = "DataRequest",
- [P_RS_DATA_REQUEST] = "RSDataRequest",
- [P_SYNC_PARAM] = "SyncParam",
- [P_SYNC_PARAM89] = "SyncParam89",
- [P_PROTOCOL] = "ReportProtocol",
- [P_UUIDS] = "ReportUUIDs",
- [P_SIZES] = "ReportSizes",
- [P_STATE] = "ReportState",
- [P_SYNC_UUID] = "ReportSyncUUID",
- [P_AUTH_CHALLENGE] = "AuthChallenge",
- [P_AUTH_RESPONSE] = "AuthResponse",
- [P_PING] = "Ping",
- [P_PING_ACK] = "PingAck",
- [P_RECV_ACK] = "RecvAck",
- [P_WRITE_ACK] = "WriteAck",
- [P_RS_WRITE_ACK] = "RSWriteAck",
- [P_DISCARD_ACK] = "DiscardAck",
- [P_NEG_ACK] = "NegAck",
- [P_NEG_DREPLY] = "NegDReply",
- [P_NEG_RS_DREPLY] = "NegRSDReply",
- [P_BARRIER_ACK] = "BarrierAck",
- [P_STATE_CHG_REQ] = "StateChgRequest",
- [P_STATE_CHG_REPLY] = "StateChgReply",
- [P_OV_REQUEST] = "OVRequest",
- [P_OV_REPLY] = "OVReply",
- [P_OV_RESULT] = "OVResult",
- [P_CSUM_RS_REQUEST] = "CsumRSRequest",
- [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
- [P_COMPRESSED_BITMAP] = "CBitmap",
- [P_DELAY_PROBE] = "DelayProbe",
- [P_OUT_OF_SYNC] = "OutOfSync",
- [P_MAX_CMD] = NULL,
- };
-
- if (cmd == P_HAND_SHAKE_M)
- return "HandShakeM";
- if (cmd == P_HAND_SHAKE_S)
- return "HandShakeS";
- if (cmd == P_HAND_SHAKE)
- return "HandShake";
- if (cmd >= P_MAX_CMD)
- return "Unknown";
- return cmdnames[cmd];
-}
+extern const char *cmdname(enum drbd_packet cmd);
/* for sending/receiving the bitmap,
* possibly in some encoding scheme */
@@ -337,37 +293,24 @@ struct p_header80 {
u32 magic;
u16 command;
u16 length; /* bytes of data after this header */
- u8 payload[0];
} __packed;
/* Header for big packets, Used for data packets exceeding 64kB */
struct p_header95 {
u16 magic; /* use DRBD_MAGIC_BIG here */
u16 command;
- u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */
- u8 payload[0];
+ u32 length;
} __packed;
-union p_header {
- struct p_header80 h80;
- struct p_header95 h95;
-};
-
-/*
- * short commands, packets without payload, plain p_header:
- * P_PING
- * P_PING_ACK
- * P_BECOME_SYNC_TARGET
- * P_BECOME_SYNC_SOURCE
- * P_UNPLUG_REMOTE
- */
+struct p_header100 {
+ u32 magic;
+ u16 volume;
+ u16 command;
+ u32 length;
+ u32 pad;
+} __packed;
-/*
- * commands with out-of-struct payload:
- * P_BITMAP (no additional fields)
- * P_DATA, P_DATA_REPLY (see p_data)
- * P_COMPRESSED_BITMAP (see receive_compressed_bitmap)
- */
+extern unsigned int drbd_header_size(struct drbd_tconn *tconn);
/* these defines must not be changed without changing the protocol version */
#define DP_HARDBARRIER 1 /* depricated */
@@ -377,9 +320,10 @@ union p_header {
#define DP_FUA 16 /* equals REQ_FUA */
#define DP_FLUSH 32 /* equals REQ_FLUSH */
#define DP_DISCARD 64 /* equals REQ_DISCARD */
+#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
+#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */
struct p_data {
- union p_header head;
u64 sector; /* 64 bits sector number */
u64 block_id; /* to identify the request in protocol B&C */
u32 seq_num;
@@ -390,21 +334,18 @@ struct p_data {
* commands which share a struct:
* p_block_ack:
* P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
- * P_DISCARD_ACK (proto C, two-primaries conflict detection)
+ * P_SUPERSEDED (proto C, two-primaries conflict detection)
* p_block_req:
* P_DATA_REQUEST, P_RS_DATA_REQUEST
*/
struct p_block_ack {
- struct p_header80 head;
u64 sector;
u64 block_id;
u32 blksize;
u32 seq_num;
} __packed;
-
struct p_block_req {
- struct p_header80 head;
u64 sector;
u64 block_id;
u32 blksize;
@@ -413,59 +354,52 @@ struct p_block_req {
/*
* commands with their own struct for additional fields:
- * P_HAND_SHAKE
+ * P_CONNECTION_FEATURES
* P_BARRIER
* P_BARRIER_ACK
* P_SYNC_PARAM
* ReportParams
*/
-struct p_handshake {
- struct p_header80 head; /* 8 bytes */
+struct p_connection_features {
u32 protocol_min;
u32 feature_flags;
u32 protocol_max;
/* should be more than enough for future enhancements
- * for now, feature_flags and the reserverd array shall be zero.
+ * for now, feature_flags and the reserved array shall be zero.
*/
u32 _pad;
- u64 reserverd[7];
+ u64 reserved[7];
} __packed;
-/* 80 bytes, FIXED for the next century */
struct p_barrier {
- struct p_header80 head;
u32 barrier; /* barrier number _handle_ only */
u32 pad; /* to multiple of 8 Byte */
} __packed;
struct p_barrier_ack {
- struct p_header80 head;
u32 barrier;
u32 set_size;
} __packed;
struct p_rs_param {
- struct p_header80 head;
- u32 rate;
+ u32 resync_rate;
/* Since protocol version 88 and higher. */
char verify_alg[0];
} __packed;
struct p_rs_param_89 {
- struct p_header80 head;
- u32 rate;
+ u32 resync_rate;
/* protocol version 89: */
char verify_alg[SHARED_SECRET_MAX];
char csums_alg[SHARED_SECRET_MAX];
} __packed;
struct p_rs_param_95 {
- struct p_header80 head;
- u32 rate;
+ u32 resync_rate;
char verify_alg[SHARED_SECRET_MAX];
char csums_alg[SHARED_SECRET_MAX];
u32 c_plan_ahead;
@@ -475,12 +409,11 @@ struct p_rs_param_95 {
} __packed;
enum drbd_conn_flags {
- CF_WANT_LOSE = 1,
+ CF_DISCARD_MY_DATA = 1,
CF_DRY_RUN = 2,
};
struct p_protocol {
- struct p_header80 head;
u32 protocol;
u32 after_sb_0p;
u32 after_sb_1p;
@@ -494,17 +427,14 @@ struct p_protocol {
} __packed;
struct p_uuids {
- struct p_header80 head;
u64 uuid[UI_EXTENDED_SIZE];
} __packed;
struct p_rs_uuid {
- struct p_header80 head;
u64 uuid;
} __packed;
struct p_sizes {
- struct p_header80 head;
u64 d_size; /* size of disk */
u64 u_size; /* user requested size */
u64 c_size; /* current exported size */
@@ -514,18 +444,15 @@ struct p_sizes {
} __packed;
struct p_state {
- struct p_header80 head;
u32 state;
} __packed;
struct p_req_state {
- struct p_header80 head;
u32 mask;
u32 val;
} __packed;
struct p_req_state_reply {
- struct p_header80 head;
u32 retcode;
} __packed;
@@ -539,15 +466,7 @@ struct p_drbd06_param {
u32 bit_map_gen[5];
} __packed;
-struct p_discard {
- struct p_header80 head;
- u64 block_id;
- u32 seq_num;
- u32 pad;
-} __packed;
-
struct p_block_desc {
- struct p_header80 head;
u64 sector;
u32 blksize;
u32 pad; /* to multiple of 8 Byte */
@@ -563,7 +482,6 @@ enum drbd_bitmap_code {
};
struct p_compressed_bm {
- struct p_header80 head;
/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
* (encoding & 0x80): polarity (set/unset) of first runlength
* ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@@ -575,90 +493,22 @@ struct p_compressed_bm {
} __packed;
struct p_delay_probe93 {
- struct p_header80 head;
u32 seq_num; /* sequence number to match the two probe packets */
u32 offset; /* usecs the probe got sent after the reference time point */
} __packed;
-/* DCBP: Drbd Compressed Bitmap Packet ... */
-static inline enum drbd_bitmap_code
-DCBP_get_code(struct p_compressed_bm *p)
-{
- return (enum drbd_bitmap_code)(p->encoding & 0x0f);
-}
-
-static inline void
-DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
-{
- BUG_ON(code & ~0xf);
- p->encoding = (p->encoding & ~0xf) | code;
-}
-
-static inline int
-DCBP_get_start(struct p_compressed_bm *p)
-{
- return (p->encoding & 0x80) != 0;
-}
-
-static inline void
-DCBP_set_start(struct p_compressed_bm *p, int set)
-{
- p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
-}
-
-static inline int
-DCBP_get_pad_bits(struct p_compressed_bm *p)
-{
- return (p->encoding >> 4) & 0x7;
-}
-
-static inline void
-DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
-{
- BUG_ON(n & ~0x7);
- p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
-}
-
-/* one bitmap packet, including the p_header,
- * should fit within one _architecture independend_ page.
- * so we need to use the fixed size 4KiB page size
- * most architectures have used for a long time.
+/*
+ * Bitmap packets need to fit within a single page on the sender and receiver,
+ * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger).
*/
-#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
-#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
-#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
-#if (PAGE_SIZE < 4096)
-/* drbd_send_bitmap / receive_bitmap would break horribly */
-#error "PAGE_SIZE too small"
-#endif
-
-union p_polymorph {
- union p_header header;
- struct p_handshake handshake;
- struct p_data data;
- struct p_block_ack block_ack;
- struct p_barrier barrier;
- struct p_barrier_ack barrier_ack;
- struct p_rs_param_89 rs_param_89;
- struct p_rs_param_95 rs_param_95;
- struct p_protocol protocol;
- struct p_sizes sizes;
- struct p_uuids uuids;
- struct p_state state;
- struct p_req_state req_state;
- struct p_req_state_reply req_state_reply;
- struct p_block_req block_req;
- struct p_delay_probe93 delay_probe93;
- struct p_rs_uuid rs_uuid;
- struct p_block_desc block_desc;
-} __packed;
+#define DRBD_SOCKET_BUFFER_SIZE 4096
/**********************************************************************/
enum drbd_thread_state {
- None,
- Running,
- Exiting,
- Restarting
+ NONE,
+ RUNNING,
+ EXITING,
+ RESTARTING
};
struct drbd_thread {
@@ -667,8 +517,9 @@ struct drbd_thread {
struct completion stop;
enum drbd_thread_state t_state;
int (*function) (struct drbd_thread *);
- struct drbd_conf *mdev;
+ struct drbd_tconn *tconn;
int reset_cpu_mask;
+ char name[9];
};
static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
@@ -681,58 +532,54 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
return thi->t_state;
}
-struct drbd_work;
-typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
struct drbd_work {
struct list_head list;
- drbd_work_cb cb;
+ int (*cb)(struct drbd_work *, int cancel);
+ union {
+ struct drbd_conf *mdev;
+ struct drbd_tconn *tconn;
+ };
};
-struct drbd_tl_epoch;
+#include "drbd_interval.h"
+
+extern int drbd_wait_misc(struct drbd_conf *, struct drbd_interval *);
+
struct drbd_request {
struct drbd_work w;
- struct drbd_conf *mdev;
/* if local IO is not allowed, will be NULL.
* if local IO _is_ allowed, holds the locally submitted bio clone,
* or, after local IO completion, the ERR_PTR(error).
- * see drbd_endio_pri(). */
+ * see drbd_request_endio(). */
struct bio *private_bio;
- struct hlist_node collision;
- sector_t sector;
- unsigned int size;
- unsigned int epoch; /* barrier_nr */
+ struct drbd_interval i;
- /* barrier_nr: used to check on "completion" whether this req was in
+ /* epoch: used to check on "completion" whether this req was in
* the current epoch, and we therefore have to close it,
- * starting a new epoch...
+ * causing a p_barrier packet to be send, starting a new epoch.
+ *
+ * This corresponds to "barrier" in struct p_barrier[_ack],
+ * and to "barrier_nr" in struct drbd_epoch (and various
+ * comments/function parameters/local variable names).
*/
+ unsigned int epoch;
struct list_head tl_requests; /* ring list in the transfer log */
struct bio *master_bio; /* master bio pointer */
- unsigned long rq_state; /* see comments above _req_mod() */
unsigned long start_time;
-};
-struct drbd_tl_epoch {
- struct drbd_work w;
- struct list_head requests; /* requests before */
- struct drbd_tl_epoch *next; /* pointer to the next barrier */
- unsigned int br_number; /* the barriers identifier. */
- int n_writes; /* number of requests attached before this barrier */
-};
+ /* once it hits 0, we may complete the master_bio */
+ atomic_t completion_ref;
+ /* once it hits 0, we may destroy this drbd_request object */
+ struct kref kref;
-struct drbd_request;
-
-/* These Tl_epoch_entries may be in one of 6 lists:
- active_ee .. data packet being written
- sync_ee .. syncer block being written
- done_ee .. block written, need to send P_WRITE_ACK
- read_ee .. [RS]P_DATA_REQUEST being read
-*/
+ unsigned rq_state; /* see comments above _req_mod() */
+};
struct drbd_epoch {
+ struct drbd_tconn *tconn;
struct list_head list;
unsigned int barrier_nr;
atomic_t epoch_size; /* increased on every request added. */
@@ -762,17 +609,14 @@ struct digest_info {
void *digest;
};
-struct drbd_epoch_entry {
+struct drbd_peer_request {
struct drbd_work w;
- struct hlist_node collision;
struct drbd_epoch *epoch; /* for writes */
- struct drbd_conf *mdev;
struct page *pages;
atomic_t pending_bios;
- unsigned int size;
+ struct drbd_interval i;
/* see comments on ee flag bits below */
unsigned long flags;
- sector_t sector;
union {
u64 block_id;
struct digest_info *digest;
@@ -793,31 +637,37 @@ enum {
* we need to resubmit without the barrier flag. */
__EE_RESUBMITTED,
- /* we may have several bios per epoch entry.
+ /* we may have several bios per peer request.
* if any of those fail, we set this flag atomically
* from the endio callback */
__EE_WAS_ERROR,
/* This ee has a pointer to a digest instead of a block id */
__EE_HAS_DIGEST,
+
+ /* Conflicting local requests need to be restarted after this request */
+ __EE_RESTART_REQUESTS,
+
+ /* The peer wants a write ACK for this (wire proto C) */
+ __EE_SEND_WRITE_ACK,
+
+ /* Is set when net_conf had two_primaries set while creating this peer_req */
+ __EE_IN_INTERVAL_TREE,
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
+#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
+#define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK)
+#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE)
-/* global flag bits */
-enum drbd_flag {
- CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */
- SIGNAL_ASENDER, /* whether asender wants to be interrupted */
- SEND_PING, /* whether asender should send a ping asap */
-
+/* flag bits per mdev */
+enum {
UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */
MD_DIRTY, /* current uuids and flags not yet on disk */
- DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */
USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */
- CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */
CL_ST_CHG_SUCCESS,
CL_ST_CHG_FAIL,
CRASHED_PRIMARY, /* This node was a crashed primary.
@@ -835,33 +685,14 @@ enum drbd_flag {
WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */
FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
- NET_CONGESTED, /* The data socket is congested */
-
- CONFIG_PENDING, /* serialization of (re)configuration requests.
- * if set, also prevents the device from dying */
- DEVICE_DYING, /* device became unconfigured,
- * but worker thread is still handling the cleanup.
- * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
- * while this is set. */
RESIZE_PENDING, /* Size change detected locally, waiting for the response from
* the peer, if it changed there as well. */
- CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
- GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
NEW_CUR_UUID, /* Create new current UUID when thawing IO */
AL_SUSPENDED, /* Activity logging is currently suspended. */
AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */
- STATE_SENT, /* Do not change state/UUIDs while this is set */
-
- CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
- * pending, from drbd worker context.
- * If set, bdi_write_congested() returns true,
- * so shrink_page_list() would not recurse into,
- * and potentially deadlock on, this drbd worker.
- */
- DISCONNECT_SENT, /* Currently the last bit in this 32bit word */
-
- /* keep last */
- DRBD_N_FLAGS,
+ B_RS_H_DONE, /* Before resync handler done (already executed) */
+ DISCARD_MY_DATA, /* discard_my_data flag per volume */
+ READ_BALANCE_RR,
};
struct drbd_bitmap; /* opaque for drbd_conf */
@@ -899,18 +730,17 @@ enum bm_flag {
struct drbd_work_queue {
struct list_head q;
- struct semaphore s; /* producers up it, worker down()s it */
spinlock_t q_lock; /* to protect the list. */
+ wait_queue_head_t q_wait;
};
struct drbd_socket {
- struct drbd_work_queue work;
struct mutex mutex;
struct socket *socket;
/* this way we get our
* send/receive buffers off the stack */
- union p_polymorph sbuf;
- union p_polymorph rbuf;
+ void *sbuf;
+ void *rbuf;
};
struct drbd_md {
@@ -927,24 +757,16 @@ struct drbd_md {
s32 bm_offset; /* signed relative sector offset to bitmap */
/* u32 al_nr_extents; important for restoring the AL
- * is stored into sync_conf.al_extents, which in turn
+ * is stored into ldev->dc.al_extents, which in turn
* gets applied to act_log->nr_elements
*/
};
-/* for sync_conf and other types... */
-#define NL_PACKET(name, number, fields) struct name { fields };
-#define NL_INTEGER(pn,pr,member) int member;
-#define NL_INT64(pn,pr,member) __u64 member;
-#define NL_BIT(pn,pr,member) unsigned member:1;
-#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
-#include <linux/drbd_nl.h>
-
struct drbd_backing_dev {
struct block_device *backing_bdev;
struct block_device *md_bdev;
struct drbd_md md;
- struct disk_conf dc; /* The user provided config... */
+ struct disk_conf *disk_conf; /* RCU, for updates: mdev->tconn->conf_update */
sector_t known_size; /* last known size of that backing device */
};
@@ -968,17 +790,116 @@ enum write_ordering_e {
};
struct fifo_buffer {
- int *values;
unsigned int head_index;
unsigned int size;
+ int total; /* sum of all values */
+ int values[0];
+};
+extern struct fifo_buffer *fifo_alloc(int fifo_size);
+
+/* flag bits per tconn */
+enum {
+ NET_CONGESTED, /* The data socket is congested */
+ RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */
+ SEND_PING, /* whether asender should send a ping asap */
+ SIGNAL_ASENDER, /* whether asender wants to be interrupted */
+ GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */
+ CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */
+ CONN_WD_ST_CHG_OKAY,
+ CONN_WD_ST_CHG_FAIL,
+ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
+ CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */
+ STATE_SENT, /* Do not change state/UUIDs while this is set */
+ CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
+ * pending, from drbd worker context.
+ * If set, bdi_write_congested() returns true,
+ * so shrink_page_list() would not recurse into,
+ * and potentially deadlock on, this drbd worker.
+ */
+ DISCONNECT_SENT,
+};
+
+struct drbd_tconn { /* is a resource from the config file */
+ char *name; /* Resource name */
+ struct list_head all_tconn; /* linked on global drbd_tconns */
+ struct kref kref;
+ struct idr volumes; /* <tconn, vnr> to mdev mapping */
+ enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */
+ unsigned susp:1; /* IO suspended by user */
+ unsigned susp_nod:1; /* IO suspended because no data */
+ unsigned susp_fen:1; /* IO suspended because fence peer handler runs */
+ struct mutex cstate_mutex; /* Protects graceful disconnects */
+
+ unsigned long flags;
+ struct net_conf *net_conf; /* content protected by rcu */
+ struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */
+ wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */
+ struct res_opts res_opts;
+
+ struct sockaddr_storage my_addr;
+ int my_addr_len;
+ struct sockaddr_storage peer_addr;
+ int peer_addr_len;
+
+ struct drbd_socket data; /* data/barrier/cstate/parameter packets */
+ struct drbd_socket meta; /* ping/ack (metadata) packets */
+ int agreed_pro_version; /* actually used protocol version */
+ unsigned long last_received; /* in jiffies, either socket */
+ unsigned int ko_count;
+
+ spinlock_t req_lock;
+
+ struct list_head transfer_log; /* all requests not yet fully processed */
+
+ struct crypto_hash *cram_hmac_tfm;
+ struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */
+ struct crypto_hash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */
+ struct crypto_hash *csums_tfm;
+ struct crypto_hash *verify_tfm;
+ void *int_dig_in;
+ void *int_dig_vv;
+
+ /* receiver side */
+ struct drbd_epoch *current_epoch;
+ spinlock_t epoch_lock;
+ unsigned int epochs;
+ enum write_ordering_e write_ordering;
+ atomic_t current_tle_nr; /* transfer log epoch number */
+ unsigned current_tle_writes; /* writes seen within this tl epoch */
+
+ unsigned long last_reconnect_jif;
+ struct drbd_thread receiver;
+ struct drbd_thread worker;
+ struct drbd_thread asender;
+ cpumask_var_t cpu_mask;
+
+ /* sender side */
+ struct drbd_work_queue sender_work;
+
+ struct {
+ /* whether this sender thread
+ * has processed a single write yet. */
+ bool seen_any_write_yet;
+
+ /* Which barrier number to send with the next P_BARRIER */
+ int current_epoch_nr;
+
+ /* how many write requests have been sent
+ * with req->epoch == current_epoch_nr.
+ * If none, no P_BARRIER will be sent. */
+ unsigned current_epoch_writes;
+ } send;
};
struct drbd_conf {
- unsigned long drbd_flags[(DRBD_N_FLAGS + BITS_PER_LONG -1)/BITS_PER_LONG];
+ struct drbd_tconn *tconn;
+ int vnr; /* volume number within the connection */
+ struct kref kref;
+
+ /* things that are stored as / read from meta data on disk */
+ unsigned long flags;
/* configured by drbdsetup */
- struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
- struct syncer_conf sync_conf;
struct drbd_backing_dev *ldev __protected_by(local);
sector_t p_size; /* partner's disk size */
@@ -986,11 +907,7 @@ struct drbd_conf {
struct block_device *this_bdev;
struct gendisk *vdisk;
- struct drbd_socket data; /* data/barrier/cstate/parameter packets */
- struct drbd_socket meta; /* ping/ack (metadata) packets */
- int agreed_pro_version; /* actually used protocol version */
- unsigned long last_received; /* in jiffies, either socket */
- unsigned int ko_count;
+ unsigned long last_reattach_jif;
struct drbd_work resync_work,
unplug_work,
go_diskless,
@@ -1010,10 +927,9 @@ struct drbd_conf {
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp;
- union drbd_state state;
+ union drbd_dev_state state;
wait_queue_head_t misc_wait;
wait_queue_head_t state_wait; /* upon each state change. */
- wait_queue_head_t net_cnt_wait;
unsigned int send_cnt;
unsigned int recv_cnt;
unsigned int read_cnt;
@@ -1023,17 +939,12 @@ struct drbd_conf {
atomic_t ap_bio_cnt; /* Requests we need to complete */
atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
- atomic_t unacked_cnt; /* Need to send replys for */
+ atomic_t unacked_cnt; /* Need to send replies for */
atomic_t local_cnt; /* Waiting for local completion */
- atomic_t net_cnt; /* Users of net_conf */
- spinlock_t req_lock;
- struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */
- struct drbd_tl_epoch *newest_tle;
- struct drbd_tl_epoch *oldest_tle;
- struct list_head out_of_sequence_requests;
- struct list_head barrier_acked_requests;
- struct hlist_head *tl_hash;
- unsigned int tl_hash_s;
+
+ /* Interval tree of pending local requests */
+ struct rb_root read_requests;
+ struct rb_root write_requests;
/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
unsigned long rs_total;
@@ -1053,6 +964,7 @@ struct drbd_conf {
unsigned long rs_mark_time[DRBD_SYNC_MARKS];
/* current index into rs_mark_{left,time} */
int rs_last_mark;
+ unsigned long rs_last_bcast; /* [unit jiffies] */
/* where does the admin want us to start? (sector) */
sector_t ov_start_sector;
@@ -1064,14 +976,7 @@ struct drbd_conf {
/* size of out-of-sync range in sectors. */
sector_t ov_last_oos_size;
unsigned long ov_left; /* in bits */
- struct crypto_hash *csums_tfm;
- struct crypto_hash *verify_tfm;
- unsigned long last_reattach_jif;
- unsigned long last_reconnect_jif;
- struct drbd_thread receiver;
- struct drbd_thread worker;
- struct drbd_thread asender;
struct drbd_bitmap *bitmap;
unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
@@ -1084,29 +989,19 @@ struct drbd_conf {
int open_cnt;
u64 *p_uuid;
- struct drbd_epoch *current_epoch;
- spinlock_t epoch_lock;
- unsigned int epochs;
- enum write_ordering_e write_ordering;
+
struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
- struct list_head done_ee; /* send ack */
- struct list_head read_ee; /* IO in progress (any read) */
+ struct list_head done_ee; /* need to send P_WRITE_ACK */
+ struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */
struct list_head net_ee; /* zero-copy network send in progress */
- struct hlist_head *ee_hash; /* is proteced by req_lock! */
- unsigned int ee_hash_s;
-
- /* this one is protected by ee_lock, single thread */
- struct drbd_epoch_entry *last_write_w_barrier;
int next_barrier_nr;
- struct hlist_head *app_reads_hash; /* is proteced by req_lock */
struct list_head resync_reads;
atomic_t pp_in_use; /* allocated from page pool */
atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */
wait_queue_head_t ee_wait;
struct page *md_io_page; /* one page buffer for md_io */
- struct page *md_io_tmpp; /* for logical_block_size != 512 */
struct drbd_md_io md_io;
atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */
spinlock_t al_lock;
@@ -1115,22 +1010,16 @@ struct drbd_conf {
unsigned int al_tr_number;
int al_tr_cycle;
int al_tr_pos; /* position of the next transaction in the journal */
- struct crypto_hash *cram_hmac_tfm;
- struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
- struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
- void *int_dig_out;
- void *int_dig_in;
- void *int_dig_vv;
wait_queue_head_t seq_wait;
atomic_t packet_seq;
unsigned int peer_seq;
spinlock_t peer_seq_lock;
unsigned int minor;
unsigned long comm_bm_set; /* communicated number of set bits. */
- cpumask_var_t cpu_mask;
struct bm_io_work bm_io_work;
u64 ed_uuid; /* UUID of the exposed data */
- struct mutex state_mutex;
+ struct mutex own_state_mutex;
+ struct mutex *state_mutex; /* either own_state_mutex or mdev->tconn->cstate_mutex */
char congestion_reason; /* Why we where congested... */
atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
atomic_t rs_sect_ev; /* for submitted resync data rate, both */
@@ -1138,46 +1027,16 @@ struct drbd_conf {
int rs_last_events; /* counter of read or write "events" (unit sectors)
* on the lower level device when we last looked. */
int c_sync_rate; /* current resync rate after syncer throttle magic */
- struct fifo_buffer rs_plan_s; /* correction values of resync planer */
+ struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, tconn->conn_update) */
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
- int rs_planed; /* resync sectors already planned */
atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
unsigned int peer_max_bio_size;
unsigned int local_max_bio_size;
};
-static inline void drbd_set_flag(struct drbd_conf *mdev, enum drbd_flag f)
-{
- set_bit(f, &mdev->drbd_flags[0]);
-}
-
-static inline void drbd_clear_flag(struct drbd_conf *mdev, enum drbd_flag f)
-{