aboutsummaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig22
-rw-r--r--drivers/block/brd.c53
-rw-r--r--drivers/block/cciss_scsi.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c21
-rw-r--r--drivers/block/drbd/drbd_int.h149
-rw-r--r--drivers/block/drbd/drbd_main.c154
-rw-r--r--drivers/block/drbd/drbd_nl.c52
-rw-r--r--drivers/block/drbd/drbd_proc.c19
-rw-r--r--drivers/block/drbd/drbd_receiver.c689
-rw-r--r--drivers/block/drbd/drbd_req.c94
-rw-r--r--drivers/block/drbd/drbd_req.h1
-rw-r--r--drivers/block/drbd/drbd_strings.c2
-rw-r--r--drivers/block/drbd/drbd_worker.c230
-rw-r--r--drivers/block/drbd/drbd_wrappers.h16
-rw-r--r--drivers/block/loop.c4
-rw-r--r--drivers/block/swim3.c8
-rw-r--r--drivers/block/virtio_blk.c50
-rw-r--r--drivers/block/xsysace.c13
18 files changed, 1040 insertions, 539 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 77bfce52e9c..de277689da6 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -76,6 +76,17 @@ config BLK_DEV_XD
It's pretty unlikely that you have one of these: say N.
+config GDROM
+ tristate "SEGA Dreamcast GD-ROM drive"
+ depends on SH_DREAMCAST
+ help
+ A standard SEGA Dreamcast comes with a modified CD ROM drive called a
+ "GD-ROM" by SEGA to signify it is capable of reading special disks
+ with up to 1 GB of data. This drive will also read standard CD ROM
+ disks. Select this option to access any disks in your GD ROM drive.
+ Most users will want to say "Y" here.
+ You can also build this as a module which will be called gdrom.
+
config PARIDE
tristate "Parallel port IDE device support"
depends on PARPORT_PC
@@ -103,17 +114,6 @@ config PARIDE
"MicroSolutions backpack protocol", "DataStor Commuter protocol"
etc.).
-config GDROM
- tristate "SEGA Dreamcast GD-ROM drive"
- depends on SH_DREAMCAST
- help
- A standard SEGA Dreamcast comes with a modified CD ROM drive called a
- "GD-ROM" by SEGA to signify it is capable of reading special disks
- with up to 1 GB of data. This drive will also read standard CD ROM
- disks. Select this option to access any disks in your GD ROM drive.
- Most users will want to say "Y" here.
- You can also build this as a module which will be called gdrom.
-
source "drivers/block/paride/Kconfig"
config BLK_CPQ_DA
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 6081e81d573..f1bf79d9bc0 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
return page;
}
+static void brd_free_page(struct brd_device *brd, sector_t sector)
+{
+ struct page *page;
+ pgoff_t idx;
+
+ spin_lock(&brd->brd_lock);
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ page = radix_tree_delete(&brd->brd_pages, idx);
+ spin_unlock(&brd->brd_lock);
+ if (page)
+ __free_page(page);
+}
+
+static void brd_zero_page(struct brd_device *brd, sector_t sector)
+{
+ struct page *page;
+
+ page = brd_lookup_page(brd, sector);
+ if (page)
+ clear_highpage(page);
+}
+
/*
* Free all backing store pages and radix tree. This must only be called when
* there are no other users of the device.
@@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
return 0;
}
+static void discard_from_brd(struct brd_device *brd,
+ sector_t sector, size_t n)
+{
+ while (n >= PAGE_SIZE) {
+ /*
+ * Don't want to actually discard pages here because
+ * re-allocating the pages can result in writeback
+ * deadlocks under heavy load.
+ */
+ if (0)
+ brd_free_page(brd, sector);
+ else
+ brd_zero_page(brd, sector);
+ sector += PAGE_SIZE >> SECTOR_SHIFT;
+ n -= PAGE_SIZE;
+ }
+}
+
/*
* Copy n bytes from src to the brd starting at sector. Does not sleep.
*/
@@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio)
get_capacity(bdev->bd_disk))
goto out;
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
+ err = 0;
+ discard_from_brd(brd, sector, bio->bi_size);
+ goto out;
+ }
+
rw = bio_rw(bio);
if (rw == READA)
rw = READ;
@@ -320,7 +366,7 @@ out:
}
#ifdef CONFIG_BLK_DEV_XIP
-static int brd_direct_access (struct block_device *bdev, sector_t sector,
+static int brd_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, unsigned long *pfn)
{
struct brd_device *brd = bdev->bd_disk->private_data;
@@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i)
blk_queue_max_hw_sectors(brd->brd_queue, 1024);
blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
+ brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
+ brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
+ brd->brd_queue->limits.discard_zeroes_data = 1;
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
+
disk = brd->brd_disk = alloc_disk(1 << part_shift);
if (!disk)
goto out_free_queue;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e1d0e2cfec7..3381505c8a6 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd)
sa = h->scsi_ctlr;
stk = &sa->cmd_stack;
+ stk->top++;
if (stk->top >= CMD_STACK_SIZE) {
printk("cciss: scsi_cmd_free called too many times.\n");
BUG();
}
- stk->top++;
stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd;
}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 3390716898d..e3f88d6e141 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -84,6 +84,9 @@ struct drbd_bitmap {
#define BM_MD_IO_ERROR 1
#define BM_P_VMALLOCED 2
+static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+ unsigned long e, int val, const enum km_type km);
+
static int bm_is_locked(struct drbd_bitmap *b)
{
return test_bit(BM_LOCKED, &b->bm_flags);
@@ -441,7 +444,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
* In case this is actually a resize, we copy the old bitmap into the new one.
* Otherwise, the bitmap is initialized to all bits set.
*/
-int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
+int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long bits, words, owords, obits, *p_addr, *bm;
@@ -516,7 +519,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
obits = b->bm_bits;
growing = bits > obits;
- if (opages)
+ if (opages && growing && set_new_bits)
bm_set_surplus(b);
b->bm_pages = npages;
@@ -526,8 +529,12 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
b->bm_dev_capacity = capacity;
if (growing) {
- bm_memset(b, owords, 0xff, words-owords);
- b->bm_set += bits - obits;
+ if (set_new_bits) {
+ bm_memset(b, owords, 0xff, words-owords);
+ b->bm_set += bits - obits;
+ } else
+ bm_memset(b, owords, 0x00, words-owords);
+
}
if (want < have) {
@@ -773,7 +780,7 @@ static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int
/* nothing to do, on disk == in memory */
# define bm_cpu_to_lel(x) ((void)0)
# else
-void bm_cpu_to_lel(struct drbd_bitmap *b)
+static void bm_cpu_to_lel(struct drbd_bitmap *b)
{
/* need to cpu_to_lel all the pages ...
* this may be optimized by using
@@ -1015,7 +1022,7 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f
* wants bitnr, not sector.
* expected to be called for only a few bits (e - s about BITS_PER_LONG).
* Must hold bitmap lock already. */
-int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
unsigned long e, int val, const enum km_type km)
{
struct drbd_bitmap *b = mdev->bitmap;
@@ -1053,7 +1060,7 @@ int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
* for val != 0, we change 0 -> 1, return code positive
* for val == 0, we change 1 -> 0, return code negative
* wants bitnr, not sector */
-int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
const unsigned long e, int val)
{
unsigned long flags;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e5e86a78182..485ed8c7d62 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -132,6 +132,7 @@ enum {
DRBD_FAULT_DT_RA = 6, /* data read ahead */
DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */
DRBD_FAULT_AL_EE = 8, /* alloc ee */
+ DRBD_FAULT_RECEIVE = 9, /* Changes some bytes upon receiving a [rs]data block */
DRBD_FAULT_MAX,
};
@@ -208,8 +209,11 @@ enum drbd_packets {
P_RS_IS_IN_SYNC = 0x22, /* meta socket */
P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */
+ /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */
+ /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */
+ P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */
- P_MAX_CMD = 0x25,
+ P_MAX_CMD = 0x28,
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
P_MAX_OPT_CMD = 0x101,
@@ -264,6 +268,7 @@ static inline const char *cmdname(enum drbd_packets cmd)
[P_CSUM_RS_REQUEST] = "CsumRSRequest",
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
[P_COMPRESSED_BITMAP] = "CBitmap",
+ [P_DELAY_PROBE] = "DelayProbe",
[P_MAX_CMD] = NULL,
};
@@ -481,7 +486,8 @@ struct p_sizes {
u64 u_size; /* user requested size */
u64 c_size; /* current exported size */
u32 max_segment_size; /* Maximal size of a BIO */
- u32 queue_order_type;
+ u16 queue_order_type; /* not yet implemented in DRBD*/
+ u16 dds_flags; /* use enum dds_flags here. */
} __packed;
struct p_state {
@@ -538,6 +544,18 @@ struct p_compressed_bm {
u8 code[0];
} __packed;
+struct p_delay_probe {
+ struct p_header head;
+ u32 seq_num; /* sequence number to match the two probe packets */
+ u32 offset; /* usecs the probe got sent after the reference time point */
+} __packed;
+
+struct delay_probe {
+ struct list_head list;
+ unsigned int seq_num;
+ struct timeval time;
+};
+
/* DCBP: Drbd Compressed Bitmap Packet ... */
static inline enum drbd_bitmap_code
DCBP_get_code(struct p_compressed_bm *p)
@@ -722,22 +740,6 @@ enum epoch_event {
EV_CLEANUP = 32, /* used as flag */
};
-struct drbd_epoch_entry {
- struct drbd_work w;
- struct drbd_conf *mdev;
- struct bio *private_bio;
- struct hlist_node colision;
- sector_t sector;
- unsigned int size;
- struct drbd_epoch *epoch;
-
- /* up to here, the struct layout is identical to drbd_request;
- * we might be able to use that to our advantage... */
-
- unsigned int flags;
- u64 block_id;
-};
-
struct drbd_wq_barrier {
struct drbd_work w;
struct completion done;
@@ -748,17 +750,49 @@ struct digest_info {
void *digest;
};
-/* ee flag bits */
+struct drbd_epoch_entry {
+ struct drbd_work w;
+ struct hlist_node colision;
+ struct drbd_epoch *epoch;
+ struct drbd_conf *mdev;
+ struct page *pages;
+ atomic_t pending_bios;
+ unsigned int size;
+ /* see comments on ee flag bits below */
+ unsigned long flags;
+ sector_t sector;
+ u64 block_id;
+};
+
+/* ee flag bits.
+ * While corresponding bios are in flight, the only modification will be
+ * set_bit WAS_ERROR, which has to be atomic.
+ * If no bios are in flight yet, or all have been completed,
+ * non-atomic modification to ee->flags is ok.
+ */
enum {
__EE_CALL_AL_COMPLETE_IO,
- __EE_CONFLICT_PENDING,
__EE_MAY_SET_IN_SYNC,
+
+ /* This epoch entry closes an epoch using a barrier.
+ * On sucessful completion, the epoch is released,
+ * and the P_BARRIER_ACK send. */
__EE_IS_BARRIER,
+
+ /* In case a barrier failed,
+ * we need to resubmit without the barrier flag. */
+ __EE_RESUBMITTED,
+
+ /* we may have several bios per epoch entry.
+ * if any of those fail, we set this flag atomically
+ * from the endio callback */
+ __EE_WAS_ERROR,
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
-#define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
#define EE_IS_BARRIER (1<<__EE_IS_BARRIER)
+#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
+#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
/* global flag bits */
enum {
@@ -908,9 +942,11 @@ struct drbd_conf {
unsigned int ko_count;
struct drbd_work resync_work,
unplug_work,
- md_sync_work;
+ md_sync_work,
+ delay_probe_work;
struct timer_list resync_timer;
struct timer_list md_sync_timer;
+ struct timer_list delay_probe_timer;
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp;
@@ -1026,6 +1062,12 @@ struct drbd_conf {
u64 ed_uuid; /* UUID of the exposed data */
struct mutex state_mutex;
char congestion_reason; /* Why we where congested... */
+ struct list_head delay_probes; /* protected by peer_seq_lock */
+ int data_delay; /* Delay of packets on the data-sock behind meta-sock */
+ unsigned int delay_seq; /* To generate sequence numbers of delay probes */
+ struct timeval dps_time; /* delay-probes-start-time */
+ unsigned int dp_volume_last; /* send_cnt of last delay probe */
+ int c_sync_rate; /* current resync rate after delay_probe magic */
};
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1081,6 +1123,11 @@ enum chg_state_flags {
CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
};
+enum dds_flags {
+ DDSF_FORCED = 1,
+ DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
+};
+
extern void drbd_init_set_defaults(struct drbd_conf *mdev);
extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
union drbd_state mask, union drbd_state val);
@@ -1113,7 +1160,7 @@ extern int drbd_send_protocol(struct drbd_conf *mdev);
extern int drbd_send_uuids(struct drbd_conf *mdev);
extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val);
-extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply);
+extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
extern int _drbd_send_state(struct drbd_conf *mdev);
extern int drbd_send_state(struct drbd_conf *mdev);
extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
@@ -1311,7 +1358,7 @@ struct bm_extent {
#define APP_R_HSIZE 15
extern int drbd_bm_init(struct drbd_conf *mdev);
-extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors);
+extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits);
extern void drbd_bm_cleanup(struct drbd_conf *mdev);
extern void drbd_bm_set_all(struct drbd_conf *mdev);
extern void drbd_bm_clear_all(struct drbd_conf *mdev);
@@ -1383,7 +1430,7 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
extern char *ppsize(char *buf, unsigned long long size);
extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local);
+extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
extern void resync_after_online_grow(struct drbd_conf *);
extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
@@ -1414,7 +1461,8 @@ static inline void ov_oos_print(struct drbd_conf *mdev)
}
-extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
+extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
+extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *);
/* worker callbacks */
extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
@@ -1426,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int);
extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
-extern int w_io_error(struct drbd_conf *, struct drbd_work *, int);
extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
@@ -1438,6 +1485,8 @@ extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data);
/* drbd_receiver.c */
+extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
+ const unsigned rw, const int fault_type);
extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
u64 id,
@@ -1490,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock)
static inline void drbd_tcp_quickack(struct socket *sock)
{
- int __user val = 1;
+ int __user val = 2;
(void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
(char __user *)&val, sizeof(val));
}
@@ -1593,6 +1642,41 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
* inline helper functions
*************************/
+/* see also page_chain_add and friends in drbd_receiver.c */
+static inline struct page *page_chain_next(struct page *page)
+{
+ return (struct page *)page_private(page);
+}
+#define page_chain_for_each(page) \
+ for (; page && ({ prefetch(page_chain_next(page)); 1; }); \
+ page = page_chain_next(page))
+#define page_chain_for_each_safe(page, n) \
+ for (; page && ({ n = page_chain_next(page); 1; }); page = n)
+
+static inline int drbd_bio_has_active_page(struct bio *bio)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ if (page_count(bvec->bv_page) > 1)
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
+{
+ struct page *page = e->pages;
+ page_chain_for_each(page) {
+ if (page_count(page) > 1)
+ return 1;
+ }
+ return 0;
+}
+
+
static inline void drbd_state_lock(struct drbd_conf *mdev)
{
wait_event(mdev->misc_wait,
@@ -1641,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
switch (mdev->ldev->dc.on_io_error) {
case EP_PASS_ON:
if (!forcedetach) {
- if (printk_ratelimit())
+ if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s."
"Passing error on...\n", where);
break;
@@ -2138,7 +2222,7 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
/* I'd like to use wait_event_lock_irq,
* but I'm not sure when it got introduced,
* and not sure when it has 3 or 4 arguments */
-static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
+static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
{
/* compare with after_state_ch,
* os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */
@@ -2160,7 +2244,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
finish_wait(&mdev->misc_wait, &wait);
spin_lock_irq(&mdev->req_lock);
}
- atomic_add(one_or_two, &mdev->ap_bio_cnt);
+ atomic_add(count, &mdev->ap_bio_cnt);
spin_unlock_irq(&mdev->req_lock);
}
@@ -2251,7 +2335,8 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
if (test_bit(MD_NO_BARRIER, &mdev->flags))
return;
- r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL);
+ r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (r) {
set_bit(MD_NO_BARRIER, &mdev->flags);
dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 93d1f9b469d..6b077f93acc 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -684,6 +684,9 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
rv = SS_NO_REMOTE_DISK;
+ else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
+ rv = SS_NO_UP_TO_DATE_DISK;
+
else if ((ns.conn == C_CONNECTED ||
ns.conn == C_WF_BITMAP_S ||
ns.conn == C_SYNC_SOURCE ||
@@ -840,7 +843,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
break;
case C_WF_BITMAP_S:
case C_PAUSED_SYNC_S:
- ns.pdsk = D_OUTDATED;
+ /* remap any consistent state to D_OUTDATED,
+ * but disallow "upgrade" of not even consistent states.
+ */
+ ns.pdsk =
+ (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED)
+ ? os.pdsk : D_OUTDATED;
break;
case C_SYNC_SOURCE:
ns.pdsk = D_INCONSISTENT;
@@ -1205,8 +1213,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
&& (ns.pdsk < D_INCONSISTENT ||
ns.pdsk == D_UNKNOWN ||
ns.pdsk == D_OUTDATED)) {
- kfree(mdev->p_uuid);
- mdev->p_uuid = NULL;
if (get_ldev(mdev)) {
if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
@@ -1232,7 +1238,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
- drbd_send_sizes(mdev, 0); /* to start sync... */
+ drbd_send_sizes(mdev, 0, 0); /* to start sync... */
drbd_send_uuids(mdev);
drbd_send_state(mdev);
}
@@ -1755,7 +1761,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
(struct p_header *)&p, sizeof(p));
}
-int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
+int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
{
struct p_sizes p;
sector_t d_size, u_size;
@@ -1767,7 +1773,6 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
d_size = drbd_get_max_capacity(mdev->ldev);
u_size = mdev->ldev->dc.disk_size;
q_order_type = drbd_queue_order_type(mdev);
- p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
put_ldev(mdev);
} else {
d_size = 0;
@@ -1779,7 +1784,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
p.u_size = cpu_to_be64(u_size);
p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
- p.queue_order_type = cpu_to_be32(q_order_type);
+ p.queue_order_type = cpu_to_be16(q_order_type);
+ p.dds_flags = cpu_to_be16(flags);
ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
(struct p_header *)&p, sizeof(p));
@@ -2180,6 +2186,43 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
return ok;
}
+static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds)
+{
+ struct p_delay_probe dp;
+ int offset, ok = 0;
+ struct timeval now;
+
+ mutex_lock(&ds->mutex);
+ if (likely(ds->socket)) {
+ do_gettimeofday(&now);
+ offset = now.tv_usec - mdev->dps_time.tv_usec +
+ (now.tv_sec - mdev->dps_time.tv_sec) * 1000000;
+ dp.seq_num = cpu_to_be32(mdev->delay_seq);
+ dp.offset = cpu_to_be32(offset);
+
+ ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE,
+ (struct p_header *)&dp, sizeof(dp), 0);
+ }
+ mutex_unlock(&ds->mutex);
+
+ return ok;
+}
+
+static int drbd_send_delay_probes(struct drbd_conf *mdev)
+{
+ int ok;
+
+ mdev->delay_seq++;
+ do_gettimeofday(&mdev->dps_time);
+ ok = drbd_send_delay_probe(mdev, &mdev->meta);
+ ok = ok && drbd_send_delay_probe(mdev, &mdev->data);
+
+ mdev->dp_volume_last = mdev->send_cnt;
+ mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10);
+
+ return ok;
+}
+
/* called on sndtimeo
* returns FALSE if we should retry,
* TRUE if we think connection is dead
@@ -2229,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
* with page_count == 0 or PageSlab.
*/
static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
- int offset, size_t size)
+ int offset, size_t size, unsigned msg_flags)
{
- int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
+ int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
kunmap(page);
if (sent == size)
mdev->send_cnt += size>>9;
@@ -2239,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
}
static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
- int offset, size_t size)
+ int offset, size_t size, unsigned msg_flags)
{
mm_segment_t oldfs = get_fs();
int sent, ok;
@@ -2252,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
* __page_cache_release a page that would actually still be referenced
* by someone, leading to some obscure delayed Oops somewhere else. */
if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
- return _drbd_no_send_page(mdev, page, offset, size);
+ return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
+ msg_flags |= MSG_NOSIGNAL;
drbd_update_congested(mdev);
set_fs(KERNEL_DS);
do {
sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
offset, len,
- MSG_NOSIGNAL);
+ msg_flags);
if (sent == -EAGAIN) {
if (we_should_drop_the_connection(mdev,
mdev->data.socket))
@@ -2288,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
{
struct bio_vec *bvec;
int i;
+ /* hint all but last page with MSG_MORE */
__bio_for_each_segment(bvec, bio, i, 0) {
if (!_drbd_no_send_page(mdev, bvec->bv_page,
- bvec->bv_offset, bvec->bv_len))
+ bvec->bv_offset, bvec->bv_len,
+ i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
return 0;
}
return 1;
@@ -2300,15 +2346,56 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
{
struct bio_vec *bvec;
int i;
+ /* hint all but last page with MSG_MORE */
__bio_for_each_segment(bvec, bio, i, 0) {
if (!_drbd_send_page(mdev, bvec->bv_page,
- bvec->bv_offset, bvec->bv_len))
+ bvec->bv_offset, bvec->bv_len,
+ i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
+ return 0;
+ }
+ return 1;
+}
+
+static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+{
+ struct page *page = e->pages;
+ unsigned len = e->size;
+ /* hint all but last page with MSG_MORE */
+ page_chain_for_each(page) {
+ unsigned l = min_t(unsigned, len, PAGE_SIZE);
+ if (!_drbd_send_page(mdev, page, 0, l,
+ page_chain_next(page) ? MSG_MORE : 0))
return 0;
+ len -= l;
}
+ return 1;
+}
+
+static void consider_delay_probes(struct drbd_conf *mdev)
+{
+ if (mdev->state.conn != C_SYNC_SOURCE || mdev->agreed_pro_version < 93)
+ return;
+
+ if (mdev->dp_volume_last + mdev->sync_conf.dp_volume * 2 < mdev->send_cnt)
+ drbd_send_delay_probes(mdev);
+}
+
+static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ if (!cancel && mdev->state.conn == C_SYNC_SOURCE)
+ drbd_send_delay_probes(mdev);
return 1;
}
+static void delay_probe_timer_fn(unsigned long data)
+{
+ struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+ if (list_empty(&mdev->delay_probe_work.list))
+ drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work);
+}
+
/* Used to send write requests
* R_PRIMARY -> Peer (P_DATA)
*/
@@ -2357,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
p.dp_flags = cpu_to_be32(dp_flags);
set_bit(UNPLUG_REMOTE, &mdev->flags);
ok = (sizeof(p) ==
- drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE));
+ drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
if (ok && dgs) {
dgb = mdev->int_dig_out;
- drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
- ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
+ drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
+ ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
}
if (ok) {
if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
@@ -2371,6 +2458,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
}
drbd_put_data_sock(mdev);
+
+ if (ok)
+ consider_delay_probes(mdev);
+
return ok;
}
@@ -2406,16 +2497,20 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
return 0;
ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
- sizeof(p), MSG_MORE);
+ sizeof(p), dgs ? MSG_MORE : 0);
if (ok && dgs) {
dgb = mdev->int_dig_out;
- drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb);
- ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
+ drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
+ ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
}
if (ok)
- ok = _drbd_send_zc_bio(mdev, e->private_bio);
+ ok = _drbd_send_zc_ee(mdev, e);
drbd_put_data_sock(mdev);
+
+ if (ok)
+ consider_delay_probes(mdev);
+
return ok;
}
@@ -2628,16 +2723,24 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
INIT_LIST_HEAD(&mdev->unplug_work.list);
INIT_LIST_HEAD(&mdev->md_sync_work.list);
INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
+ INIT_LIST_HEAD(&mdev->delay_probes);
+ INIT_LIST_HEAD(&mdev->delay_probe_work.list);
+
mdev->resync_work.cb = w_resync_inactive;
mdev->unplug_work.cb = w_send_write_hint;
mdev->md_sync_work.cb = w_md_sync;
mdev->bm_io_work.w.cb = w_bitmap_io;
+ mdev->delay_probe_work.cb = w_delay_probes;
init_timer(&mdev->resync_timer);
init_timer(&mdev->md_sync_timer);
+ init_timer(&mdev->delay_probe_timer);
mdev->resync_timer.function = resync_timer_fn;
mdev->resync_timer.data = (unsigned long) mdev;
mdev->md_sync_timer.function = md_sync_timer_fn;
mdev->md_sync_timer.data = (unsigned long) mdev;
+ mdev->delay_probe_timer.function = delay_probe_timer_fn;
+ mdev->delay_probe_timer.data = (unsigned long) mdev;
+
init_waitqueue_head(&mdev->misc_wait);
init_waitqueue_head(&mdev->state_wait);
@@ -2680,7 +2783,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
drbd_set_my_capacity(mdev, 0);
if (mdev->bitmap) {
/* maybe never allocated. */
- drbd_bm_resize(mdev, 0);
+ drbd_bm_resize(mdev, 0, 1);
drbd_bm_cleanup(mdev);
}
@@ -3129,7 +3232,7 @@ int __init drbd_init(void)
if (err)
goto Enomem;
- drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops);
+ drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
if (!drbd_proc) {
printk(KERN_ERR "drbd: unable to register proc file\n");
goto Enomem;
@@ -3660,7 +3763,8 @@ _drbd_fault_str(unsigned int type) {
[DRBD_FAULT_DT_RD] = "Data read",
[DRBD_FAULT_DT_RA] = "Data read ahead",
[DRBD_FAULT_BM_ALLOC] = "BM allocation",
- [DRBD_FAULT_AL_EE] = "EE allocation"
+ [DRBD_FAULT_AL_EE] = "EE allocation",
+ [DRBD_FAULT_RECEIVE] = "receive data corruption",
};
return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
@@ -3679,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
if (ret) {
fault_count++;
- if (printk_ratelimit())
+ if (__ratelimit(&drbd_ratelimit_state))
dev_warn(DEV, "***Simulating %s failure\n",
_drbd_fault_str(type));
}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 6429d2b19e0..632e3245d1b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -510,7 +510,7 @@ void drbd_r