aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/drbd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_actlog.c41
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_int.h216
-rw-r--r--drivers/block/drbd/drbd_main.c593
-rw-r--r--drivers/block/drbd/drbd_nl.c270
-rw-r--r--drivers/block/drbd/drbd_proc.c34
-rw-r--r--drivers/block/drbd/drbd_receiver.c946
-rw-r--r--drivers/block/drbd/drbd_req.c165
-rw-r--r--drivers/block/drbd/drbd_req.h62
-rw-r--r--drivers/block/drbd/drbd_worker.c292
10 files changed, 1733 insertions, 888 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 9400845d602..ac04ef97eac 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -965,29 +965,30 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
* ok, (capacity & 7) != 0 sometimes, but who cares...
* we count rs_{total,left} in bits, not sectors.
*/
- spin_lock_irqsave(&mdev->al_lock, flags);
count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
- if (count) {
- /* we need the lock for drbd_try_clear_on_disk_bm */
- if (jiffies - mdev->rs_mark_time > HZ*10) {
- /* should be rolling marks,
- * but we estimate only anyways. */
- if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) &&
+ if (count && get_ldev(mdev)) {
+ unsigned long now = jiffies;
+ unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
+ int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
+ if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
+ unsigned long tw = drbd_bm_total_weight(mdev);
+ if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
mdev->state.conn != C_PAUSED_SYNC_T &&
mdev->state.conn != C_PAUSED_SYNC_S) {
- mdev->rs_mark_time = jiffies;
- mdev->rs_mark_left = drbd_bm_total_weight(mdev);
+ mdev->rs_mark_time[next] = now;
+ mdev->rs_mark_left[next] = tw;
+ mdev->rs_last_mark = next;
}
}
- if (get_ldev(mdev)) {
- drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
- put_ldev(mdev);
- }
+ spin_lock_irqsave(&mdev->al_lock, flags);
+ drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
+ spin_unlock_irqrestore(&mdev->al_lock, flags);
+
/* just wake_up unconditional now, various lc_chaged(),
* lc_put() in drbd_try_clear_on_disk_bm(). */
wake_up = 1;
+ put_ldev(mdev);
}
- spin_unlock_irqrestore(&mdev->al_lock, flags);
if (wake_up)
wake_up(&mdev->al_wait);
}
@@ -1118,7 +1119,7 @@ static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
* @mdev: DRBD device.
* @sector: The sector number.
*
- * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
+ * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
*/
int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
{
@@ -1129,10 +1130,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
sig = wait_event_interruptible(mdev->al_wait,
(bm_ext = _bme_get(mdev, enr)));
if (sig)
- return 0;
+ return -EINTR;
if (test_bit(BME_LOCKED, &bm_ext->flags))
- return 1;
+ return 0;
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(mdev->al_wait,
@@ -1145,13 +1146,11 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
wake_up(&mdev->al_wait);
}
spin_unlock_irq(&mdev->al_lock);
- return 0;
+ return -EINTR;
}
}
-
set_bit(BME_LOCKED, &bm_ext->flags);
-
- return 1;
+ return 0;
}
/**
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index e3f88d6e141..fd42832f785 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -569,7 +569,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
*
* maybe bm_set should be atomic_t ?
*/
-static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
+unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long s;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 352441b0f92..c07c370c4c8 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -337,13 +337,25 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
* NOTE that the payload starts at a long aligned offset,
* regardless of 32 or 64 bit arch!
*/
-struct p_header {
+struct p_header80 {
u32 magic;
u16 command;
u16 length; /* bytes of data after this header */
u8 payload[0];
} __packed;
-/* 8 bytes. packet FIXED for the next century! */
+
+/* Header for big packets, Used for data packets exceeding 64kB */
+struct p_header95 {
+ u16 magic; /* use DRBD_MAGIC_BIG here */
+ u16 command;
+ u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */
+ u8 payload[0];
+} __packed;
+
+union p_header {
+ struct p_header80 h80;
+ struct p_header95 h95;
+};
/*
* short commands, packets without payload, plain p_header:
@@ -362,12 +374,16 @@ struct p_header {
*/
/* these defines must not be changed without changing the protocol version */
-#define DP_HARDBARRIER 1
-#define DP_RW_SYNC 2
+#define DP_HARDBARRIER 1 /* depricated */
+#define DP_RW_SYNC 2 /* equals REQ_SYNC */
#define DP_MAY_SET_IN_SYNC 4
+#define DP_UNPLUG 8 /* equals REQ_UNPLUG */
+#define DP_FUA 16 /* equals REQ_FUA */
+#define DP_FLUSH 32 /* equals REQ_FLUSH */
+#define DP_DISCARD 64 /* equals REQ_DISCARD */
struct p_data {
- struct p_header head;
+ union p_header head;
u64 sector; /* 64 bits sector number */
u64 block_id; /* to identify the request in protocol B&C */
u32 seq_num;
@@ -383,7 +399,7 @@ struct p_data {
* P_DATA_REQUEST, P_RS_DATA_REQUEST
*/
struct p_block_ack {
- struct p_header head;
+ struct p_header80 head;
u64 sector;
u64 block_id;
u32 blksize;
@@ -392,7 +408,7 @@ struct p_block_ack {
struct p_block_req {
- struct p_header head;
+ struct p_header80 head;
u64 sector;
u64 block_id;
u32 blksize;
@@ -409,7 +425,7 @@ struct p_block_req {
*/
struct p_handshake {
- struct p_header head; /* 8 bytes */
+ struct p_header80 head; /* 8 bytes */
u32 protocol_min;
u32 feature_flags;
u32 protocol_max;
@@ -424,19 +440,19 @@ struct p_handshake {
/* 80 bytes, FIXED for the next century */
struct p_barrier {
- struct p_header head;
+ struct p_header80 head;
u32 barrier; /* barrier number _handle_ only */
u32 pad; /* to multiple of 8 Byte */
} __packed;
struct p_barrier_ack {
- struct p_header head;
+ struct p_header80 head;
u32 barrier;
u32 set_size;
} __packed;
struct p_rs_param {
- struct p_header head;
+ struct p_header80 head;
u32 rate;
/* Since protocol version 88 and higher. */
@@ -444,20 +460,31 @@ struct p_rs_param {
} __packed;
struct p_rs_param_89 {
- struct p_header head;
+ struct p_header80 head;
u32 rate;
/* protocol version 89: */
char verify_alg[SHARED_SECRET_MAX];
char csums_alg[SHARED_SECRET_MAX];
} __packed;
+struct p_rs_param_95 {
+ struct p_header80 head;
+ u32 rate;
+ char verify_alg[SHARED_SECRET_MAX];
+ char csums_alg[SHARED_SECRET_MAX];
+ u32 c_plan_ahead;
+ u32 c_delay_target;
+ u32 c_fill_target;
+ u32 c_max_rate;
+} __packed;
+
enum drbd_conn_flags {
CF_WANT_LOSE = 1,
CF_DRY_RUN = 2,
};
struct p_protocol {
- struct p_header head;
+ struct p_header80 head;
u32 protocol;
u32 after_sb_0p;
u32 after_sb_1p;
@@ -471,17 +498,17 @@ struct p_protocol {
} __packed;
struct p_uuids {
- struct p_header head;
+ struct p_header80 head;
u64 uuid[UI_EXTENDED_SIZE];
} __packed;
struct p_rs_uuid {
- struct p_header head;
+ struct p_header80 head;
u64 uuid;
} __packed;
struct p_sizes {
- struct p_header head;
+ struct p_header80 head;
u64 d_size; /* size of disk */
u64 u_size; /* user requested size */
u64 c_size; /* current exported size */
@@ -491,18 +518,18 @@ struct p_sizes {
} __packed;
struct p_state {
- struct p_header head;
+ struct p_header80 head;
u32 state;
} __packed;
struct p_req_state {
- struct p_header head;
+ struct p_header80 head;
u32 mask;
u32 val;
} __packed;
struct p_req_state_reply {
- struct p_header head;
+ struct p_header80 head;
u32 retcode;
} __packed;
@@ -517,7 +544,7 @@ struct p_drbd06_param {
} __packed;
struct p_discard {
- struct p_header head;
+ struct p_header80 head;
u64 block_id;
u32 seq_num;
u32 pad;
@@ -533,7 +560,7 @@ enum drbd_bitmap_code {
};
struct p_compressed_bm {
- struct p_header head;
+ struct p_header80 head;
/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
* (encoding & 0x80): polarity (set/unset) of first runlength
* ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@@ -544,10 +571,10 @@ struct p_compressed_bm {
u8 code[0];
} __packed;
-struct p_delay_probe {
- struct p_header head;
- u32 seq_num; /* sequence number to match the two probe packets */
- u32 offset; /* usecs the probe got sent after the reference time point */
+struct p_delay_probe93 {
+ struct p_header80 head;
+ u32 seq_num; /* sequence number to match the two probe packets */
+ u32 offset; /* usecs the probe got sent after the reference time point */
} __packed;
/* DCBP: Drbd Compressed Bitmap Packet ... */
@@ -594,7 +621,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
* so we need to use the fixed size 4KiB page size
* most architechtures have used for a long time.
*/
-#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header))
+#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
#if (PAGE_SIZE < 4096)
@@ -603,13 +630,14 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
#endif
union p_polymorph {
- struct p_header header;
+ union p_header header;
struct p_handshake handshake;
struct p_data data;
struct p_block_ack block_ack;
struct p_barrier barrier;
struct p_barrier_ack barrier_ack;
struct p_rs_param_89 rs_param_89;
+ struct p_rs_param_95 rs_param_95;
struct p_protocol protocol;
struct p_sizes sizes;
struct p_uuids uuids;
@@ -617,6 +645,8 @@ union p_polymorph {
struct p_req_state req_state;
struct p_req_state_reply req_state_reply;
struct p_block_req block_req;
+ struct p_delay_probe93 delay_probe93;
+ struct p_rs_uuid rs_uuid;
} __packed;
/**********************************************************************/
@@ -697,7 +727,7 @@ struct drbd_tl_epoch {
struct list_head requests; /* requests before */
struct drbd_tl_epoch *next; /* pointer to the next barrier */
unsigned int br_number; /* the barriers identifier. */
- int n_req; /* number of requests attached before this barrier */
+ int n_writes; /* number of requests attached before this barrier */
};
struct drbd_request;
@@ -747,7 +777,7 @@ struct digest_info {
struct drbd_epoch_entry {
struct drbd_work w;
struct hlist_node colision;
- struct drbd_epoch *epoch;
+ struct drbd_epoch *epoch; /* for writes */
struct drbd_conf *mdev;
struct page *pages;
atomic_t pending_bios;
@@ -755,7 +785,10 @@ struct drbd_epoch_entry {
/* see comments on ee flag bits below */
unsigned long flags;
sector_t sector;
- u64 block_id;
+ union {
+ u64 block_id;
+ struct digest_info *digest;
+ };
};
/* ee flag bits.
@@ -781,12 +814,16 @@ enum {
* if any of those fail, we set this flag atomically
* from the endio callback */
__EE_WAS_ERROR,
+
+ /* This ee has a pointer to a digest instead of a block id */
+ __EE_HAS_DIGEST,
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
#define EE_IS_BARRIER (1<<__EE_IS_BARRIER)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
+#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
/* global flag bits */
enum {
@@ -794,7 +831,6 @@ enum {
SIGNAL_ASENDER, /* whether asender wants to be interrupted */
SEND_PING, /* whether asender should send a ping asap */
- STOP_SYNC_TIMER, /* tell timer to cancel itself */
UNPLUG_QUEUED, /* only relevant with kernel 2.4 */
UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */
MD_DIRTY, /* current uuids and flags not yet on disk */
@@ -816,6 +852,7 @@ enum {
BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
+ GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
@@ -829,6 +866,8 @@ enum {
* the peer, if it changed there as well. */
CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
+ NEW_CUR_UUID, /* Create new current UUID when thawing IO */
+ AL_SUSPENDED, /* Activity logging is currently suspended. */
};
struct drbd_bitmap; /* opaque for drbd_conf */
@@ -838,10 +877,6 @@ struct drbd_bitmap; /* opaque for drbd_conf */
/* THINK maybe we actually want to use the default "event/%s" worker threads
* or similar in linux 2.6, which uses per cpu data and threads.
- *
- * To be general, this might need a spin_lock member.
- * For now, please use the mdev->req_lock to protect list_head,
- * see drbd_queue_work below.
*/
struct drbd_work_queue {
struct list_head q;
@@ -915,6 +950,12 @@ enum write_ordering_e {
WO_bio_barrier
};
+struct fifo_buffer {
+ int *values;
+ unsigned int head_index;
+ unsigned int size;
+};
+
struct drbd_conf {
/* things that are stored as / read from meta data on disk */
unsigned long flags;
@@ -936,9 +977,16 @@ struct drbd_conf {
unsigned int ko_count;
struct drbd_work resync_work,
unplug_work,
+ go_diskless,
md_sync_work;
struct timer_list resync_timer;
struct timer_list md_sync_timer;
+#ifdef DRBD_DEBUG_MD_SYNC
+ struct {
+ unsigned int line;
+ const char* func;
+ } last_md_mark_dirty;
+#endif
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp;
@@ -946,6 +994,7 @@ struct drbd_conf {
union drbd_state state;
wait_queue_head_t misc_wait;
wait_queue_head_t state_wait; /* upon each state change. */
+ wait_queue_head_t net_cnt_wait;
unsigned int send_cnt;
unsigned int recv_cnt;
unsigned int read_cnt;
@@ -974,12 +1023,16 @@ struct drbd_conf {
unsigned long rs_start;
/* cumulated time in PausedSyncX state [unit jiffies] */
unsigned long rs_paused;
+ /* skipped because csum was equal [unit BM_BLOCK_SIZE] */
+ unsigned long rs_same_csum;
+#define DRBD_SYNC_MARKS 8
+#define DRBD_SYNC_MARK_STEP (3*HZ)
/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
- unsigned long rs_mark_left;
+ unsigned long rs_mark_left[DRBD_SYNC_MARKS];
/* marks's time [unit jiffies] */
- unsigned long rs_mark_time;
- /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */
- unsigned long rs_same_csum;
+ unsigned long rs_mark_time[DRBD_SYNC_MARKS];
+ /* current index into rs_mark_{left,time} */
+ int rs_last_mark;
/* where does the admin want us to start? (sector) */
sector_t ov_start_sector;
@@ -1012,10 +1065,10 @@ struct drbd_conf {
spinlock_t epoch_lock;
unsigned int epochs;
enum write_ordering_e write_ordering;
- struct list_head active_ee; /* IO in progress */
- struct list_head sync_ee; /* IO in progress */
+ struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
+ struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
struct list_head done_ee; /* send ack */
- struct list_head read_ee; /* IO in progress */
+ struct list_head read_ee; /* IO in progress (any read) */
struct list_head net_ee; /* zero-copy network send in progress */
struct hlist_head *ee_hash; /* is proteced by req_lock! */
unsigned int ee_hash_s;
@@ -1026,7 +1079,8 @@ struct drbd_conf {
int next_barrier_nr;
struct hlist_head *app_reads_hash; /* is proteced by req_lock */
struct list_head resync_reads;
- atomic_t pp_in_use;
+ atomic_t pp_in_use; /* allocated from page pool */
+ atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */
wait_queue_head_t ee_wait;
struct page *md_io_page; /* one page buffer for md_io */
struct page *md_io_tmpp; /* for logical_block_size != 512 */
@@ -1054,6 +1108,15 @@ struct drbd_conf {
u64 ed_uuid; /* UUID of the exposed data */
struct mutex state_mutex;
char congestion_reason; /* Why we where congested... */
+ atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
+ atomic_t rs_sect_ev; /* for submitted resync data rate, both */
+ int rs_last_sect_ev; /* counter to compare with */
+ int rs_last_events; /* counter of read or write "events" (unit sectors)
+ * on the lower level device when we last looked. */
+ int c_sync_rate; /* current resync rate after syncer throttle magic */
+ struct fifo_buffer rs_plan_s; /* correction values of resync planer */
+ int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
+ int rs_planed; /* resync sectors already planed */
};
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1138,6 +1201,8 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
unsigned int set_size);
extern void tl_clear(struct drbd_conf *mdev);
+enum drbd_req_event;
+extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
extern void drbd_free_sock(struct drbd_conf *mdev);
extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
@@ -1150,12 +1215,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f
extern int _drbd_send_state(struct drbd_conf *mdev);
extern int drbd_send_state(struct drbd_conf *mdev);
extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
- enum drbd_packets cmd, struct p_header *h,
+ enum drbd_packets cmd, struct p_header80 *h,
size_t size, unsigned msg_flags);
#define USE_DATA_SOCKET 1
#define USE_META_SOCKET 0
extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
- enum drbd_packets cmd, struct p_header *h,
+ enum drbd_packets cmd, struct p_header80 *h,
size_t size);
extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
char *data, size_t size);
@@ -1167,7 +1232,7 @@ extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
struct p_block_req *rp);
extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
- struct p_data *dp);
+ struct p_data *dp, int data_size);
extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
sector_t sector, int blksize, u64 block_id);
extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
@@ -1201,7 +1266,13 @@ extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
+#ifndef DRBD_DEBUG_MD_SYNC
extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
+#else
+#define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ )
+extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
+ unsigned int line, const char *func);
+#endif
extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
int (*io_fn)(struct drbd_conf *),
void (*done)(struct drbd_conf *, int),
@@ -1209,6 +1280,7 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
+extern void drbd_go_diskless(struct drbd_conf *mdev);
/* Meta data layout
@@ -1264,6 +1336,8 @@ struct bm_extent {
* Bit 1 ==> local node thinks this block needs to be synced.
*/
+#define SLEEP_TIME (HZ/10)
+
#define BM_BLOCK_SHIFT 12 /* 4k per bit */
#define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT)
/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
@@ -1335,11 +1409,13 @@ struct bm_extent {
#endif
/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
- * With a value of 6 all IO in one 32K block make it to the same slot of the
+ * With a value of 8 all IO in one 128K block make it to the same slot of the
* hash table. */
-#define HT_SHIFT 6
+#define HT_SHIFT 8
#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
+#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
+
/* Number of elements in the app_reads_hash */
#define APP_R_HSIZE 15
@@ -1369,6 +1445,7 @@ extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_
/* bm_find_next variants for use while you hold drbd_bm_lock() */
extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev);
extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
extern int drbd_bm_rs_done(struct drbd_conf *mdev);
/* for receive_bitmap */
@@ -1421,7 +1498,8 @@ extern void resync_after_online_grow(struct drbd_conf *);
extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
int force);
-enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
+extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
+extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
/* drbd_worker.c */
@@ -1467,10 +1545,12 @@ extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
+extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data);
/* drbd_receiver.c */
+extern int drbd_rs_should_slow_down(struct drbd_conf *mdev);
extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
const unsigned rw, const int fault_type);
extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
@@ -1479,7 +1559,10 @@ extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
sector_t sector,
unsigned int data_size,
gfp_t gfp_mask) __must_hold(local);
-extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e);
+extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
+ int is_net);
+#define drbd_free_ee(m,e) drbd_free_some_ee(m, e, 0)
+#define drbd_free_net_ee(m,e) drbd_free_some_ee(m, e, 1)
extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
struct list_head *head);
extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
@@ -1487,6 +1570,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
extern void drbd_flush_workqueue(struct drbd_conf *mdev);
+extern void drbd_free_tl_hash(struct drbd_conf *mdev);
/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
* mess with get_fs/set_fs, we know we are KERNEL_DS always. */
@@ -1600,6 +1684,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
#define susp_MASK 1
#define user_isp_MASK 1
#define aftr_isp_MASK 1
+#define susp_nod_MASK 1
+#define susp_fen_MASK 1
#define NS(T, S) \
({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
@@ -1856,13 +1942,6 @@ static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
}
static inline void
-_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
-{
- list_add_tail(&w->list, &q->q);
- up(&q->s);
-}
-
-static inline void
drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
{
unsigned long flags;
@@ -1899,19 +1978,19 @@ static inline void request_ping(struct drbd_conf *mdev)
static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
enum drbd_packets cmd)
{
- struct p_header h;
+ struct p_header80 h;
return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
}
static inline int drbd_send_ping(struct drbd_conf *mdev)
{
- struct p_header h;
+ struct p_header80 h;
return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
}
static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
{
- struct p_header h;
+ struct p_header80 h;
return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
}
@@ -2013,7 +2092,7 @@ static inline void inc_unacked(struct drbd_conf *mdev)
static inline void put_net_conf(struct drbd_conf *mdev)
{
if (atomic_dec_and_test(&mdev->net_cnt))
- wake_up(&mdev->misc_wait);
+ wake_up(&mdev->net_cnt_wait);
}
/**
@@ -2044,10 +2123,14 @@ static inline int get_net_conf(struct drbd_conf *mdev)
static inline void put_ldev(struct drbd_conf *mdev)
{
+ int i = atomic_dec_return(&mdev->local_cnt);
__release(local);
- if (atomic_dec_and_test(&mdev->local_cnt))
+ D_ASSERT(i >= 0);
+ if (i == 0) {
+ if (mdev->state.disk == D_FAILED)
+ drbd_go_diskless(mdev);
wake_up(&mdev->misc_wait);
- D_ASSERT(atomic_read(&mdev->local_cnt) >= 0);
+ }
}
#ifndef __CHECKER__
@@ -2179,11 +2262,16 @@ static inline int drbd_state_is_stable(union drbd_state s)
return 1;
}
+static inline int is_susp(union drbd_state s)
+{
+ return s.susp || s.susp_nod || s.susp_fen;
+}
+
static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
{
int mxb = drbd_get_max_buffers(mdev);
- if (mdev->state.susp)
+ if (is_susp(mdev->state))
return 0;
if (test_bit(SUSPEND_IO, &mdev->flags))
return 0;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index e4b56119866..c5dfe6486cf 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -78,6 +78,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
static void md_sync_timer_fn(unsigned long data);
static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
"Lars Ellenberg <lars@linbit.com>");
@@ -200,7 +201,7 @@ static int tl_init(struct drbd_conf *mdev)
INIT_LIST_HEAD(&b->w.list);
b->next = NULL;
b->br_number = 4711;
- b->n_req = 0;
+ b->n_writes = 0;
b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
mdev->oldest_tle = b;
@@ -241,7 +242,7 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
INIT_LIST_HEAD(&new->w.list);
new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
new->next = NULL;
- new->n_req = 0;
+ new->n_writes = 0;
newest_before = mdev->newest_tle;
/* never send a barrier number == 0, because that is special-cased
@@ -285,9 +286,9 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
barrier_nr, b->br_number);
goto bail;
}
- if (b->n_req != set_size) {
- dev_err(DEV, "BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n",
- barrier_nr, set_size, b->n_req);
+ if (b->n_writes != set_size) {
+ dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
+ barrier_nr, set_size, b->n_writes);
goto bail;
}
@@ -334,6 +335,82 @@ bail:
drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
}
+/**
+ * _tl_restart() - Walks the transfer log, and applies an action to all requests
+ * @mdev: DRBD device.
+ * @what: The action/event to perform with all request objects
+ *
+ * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
+ * restart_frozen_disk_io.
+ */
+static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
+{
+ struct drbd_tl_epoch *b, *tmp, **pn;
+ struct list_head *le, *tle, carry_reads;
+ struct drbd_request *req;
+ int rv, n_writes, n_reads;
+
+ b = mdev->oldest_tle;
+ pn = &mdev->oldest_tle;
+ while (b) {
+ n_writes = 0;
+ n_reads = 0;
+ INIT_LIST_HEAD(&carry_reads);
+ list_for_each_safe(le, tle, &b->requests) {
+ req = list_entry(le, struct drbd_request, tl_requests);
+ rv = _req_mod(req, what);
+
+ n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
+ n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
+ }
+ tmp = b->next;
+
+ if (n_writes) {
+ if (what == resend) {
+ b->n_writes = n_writes;
+ if (b->w.cb == NULL) {
+ b->w.cb = w_send_barrier;
+ inc_ap_pending(mdev);
+ set_bit(CREATE_BARRIER, &mdev->flags);
+ }
+
+ drbd_queue_work(&mdev->data.work, &b->w);
+ }
+ pn = &b->next;
+ } else {
+ if (n_reads)
+ list_add(&carry_reads, &b->requests);
+ /* there could still be requests on that ring list,
+ * in case local io is still pending */
+ list_del(&b->requests);
+
+ /* dec_ap_pending corresponding to queue_barrier.
+ * the newest barrier may not have been queued yet,
+ * in which case w.cb is still NULL. */
+ if (b->w.cb != NULL)
+ dec_ap_pending(mdev);
+
+ if (b == mdev->newest_tle) {
+ /* recycle, but reinit! */
+ D_ASSERT(tmp == NULL);
+ INIT_LIST_HEAD(&b->requests);
+ list_splice(&carry_reads, &b->requests);
+ INIT_LIST_HEAD(&b->w.list);
+ b->w.cb = NULL;
+ b->br_number = net_random();
+ b->n_writes = 0;
+
+ *pn = b;
+ break;
+ }
+ *pn = tmp;
+ kfree(b);
+ }
+ b = tmp;
+ list_splice(&carry_reads, &b->requests);
+ }
+}
+
/**
* tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
@@ -345,48 +422,12 @@ bail:
*/
void tl_clear(struct drbd_conf *mdev)
{
- struct drbd_tl_epoch *b, *tmp;
struct list_head *le, *tle;
struct drbd_request *r;
- int new_initial_bnr = net_random();
spin_lock_irq(&mdev->req_lock);
- b = mdev->oldest_tle;
- while (b) {
- list_for_each_safe(le, tle, &b->requests) {
- r = list_entry(le, struct drbd_request, tl_requests);
- /* It would be nice to complete outside of spinlock.
- * But this is easier for now. */
- _req_mod(r, connection_lost_while_pending);
- }
- tmp = b->next;
-
- /* there could still be requests on that ring list,
- * in case local io is still pending */
- list_del(&b->requests);
-
- /* dec_ap_pending corresponding to queue_barrier.
- * the newest barrier may not have been queued yet,
- * in which case w.cb is still NULL. */
- if (b->w.cb != NULL)
- dec_ap_pending(mdev);
-
- if (b == mdev->newest_tle) {
- /* recycle, but reinit! */
- D_ASSERT(tmp == NULL);
- INIT_LIST_HEAD(&b->requests);
- INIT_LIST_HEAD(&b->w.list);
- b->w.cb = NULL;
- b->br_number = new_initial_bnr;
- b->n_req = 0;
-
- mdev->oldest_tle = b;
- break;
- }
- kfree(b);
- b = tmp;
- }
+ _tl_restart(mdev, connection_lost_while_pending);
/* we expect this list to be empty. */
D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
@@ -402,6 +443,15 @@ void tl_clear(struct drbd_conf *mdev)
/* ensure bit indicating barrier is required is clear */
clear_bit(CREATE_BARRIER, &mdev->flags);
+ memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
+
+ spin_unlock_irq(&mdev->req_lock);
+}
+
+void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
+{
+ spin_lock_irq(&mdev->req_lock);
+ _tl_restart(mdev, what);
spin_unlock_irq(&mdev->req_lock);
}
@@ -456,7 +506,7 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
static int is_valid_state_transition(struct drbd_conf *,
union drbd_state, union drbd_state);
static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
- union drbd_state ns, int *warn_sync_abort);
+ union drbd_state ns, const char **warn_sync_abort);
int drbd_send_state_req(struct drbd_conf *,
union drbd_state, union drbd_state);
@@ -606,7 +656,7 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
drbd_role_str(ns.peer),
drbd_disk_str(ns.disk),
drbd_disk_str(ns.pdsk),
- ns.susp ? 's' : 'r',
+ is_susp(ns) ? 's' : 'r',
ns.aftr_isp ? 'a' : '-',
ns.peer_isp ? 'p' : '-',
ns.user_isp ? 'u' : '-'
@@ -764,7 +814,7 @@ static int is_valid_state_transition(struct drbd_conf *mdev,
* to D_UNKNOWN. This rule and many more along those lines are in this function.
*/
static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
- union drbd_state ns, int *warn_sync_abort)
+ union drbd_state ns, const char **warn_sync_abort)
{
enum drbd_fencing_p fp;
@@ -779,9 +829,10 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state