diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 22 | ||||
-rw-r--r-- | drivers/block/brd.c | 53 | ||||
-rw-r--r-- | drivers/block/cciss_scsi.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 21 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 149 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 154 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 52 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 19 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 689 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 94 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_strings.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 230 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_wrappers.h | 16 | ||||
-rw-r--r-- | drivers/block/loop.c | 4 | ||||
-rw-r--r-- | drivers/block/swim3.c | 8 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 50 | ||||
-rw-r--r-- | drivers/block/xsysace.c | 13 |
18 files changed, 1040 insertions, 539 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 77bfce52e9c..de277689da6 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -76,6 +76,17 @@ config BLK_DEV_XD It's pretty unlikely that you have one of these: say N. +config GDROM + tristate "SEGA Dreamcast GD-ROM drive" + depends on SH_DREAMCAST + help + A standard SEGA Dreamcast comes with a modified CD ROM drive called a + "GD-ROM" by SEGA to signify it is capable of reading special disks + with up to 1 GB of data. This drive will also read standard CD ROM + disks. Select this option to access any disks in your GD ROM drive. + Most users will want to say "Y" here. + You can also build this as a module which will be called gdrom. + config PARIDE tristate "Parallel port IDE device support" depends on PARPORT_PC @@ -103,17 +114,6 @@ config PARIDE "MicroSolutions backpack protocol", "DataStor Commuter protocol" etc.). -config GDROM - tristate "SEGA Dreamcast GD-ROM drive" - depends on SH_DREAMCAST - help - A standard SEGA Dreamcast comes with a modified CD ROM drive called a - "GD-ROM" by SEGA to signify it is capable of reading special disks - with up to 1 GB of data. This drive will also read standard CD ROM - disks. Select this option to access any disks in your GD ROM drive. - Most users will want to say "Y" here. - You can also build this as a module which will be called gdrom. - source "drivers/block/paride/Kconfig" config BLK_CPQ_DA diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 6081e81d573..f1bf79d9bc0 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) return page; } +static void brd_free_page(struct brd_device *brd, sector_t sector) +{ + struct page *page; + pgoff_t idx; + + spin_lock(&brd->brd_lock); + idx = sector >> PAGE_SECTORS_SHIFT; + page = radix_tree_delete(&brd->brd_pages, idx); + spin_unlock(&brd->brd_lock); + if (page) + __free_page(page); +} + +static void brd_zero_page(struct brd_device *brd, sector_t sector) +{ + struct page *page; + + page = brd_lookup_page(brd, sector); + if (page) + clear_highpage(page); +} + /* * Free all backing store pages and radix tree. This must only be called when * there are no other users of the device. @@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) return 0; } +static void discard_from_brd(struct brd_device *brd, + sector_t sector, size_t n) +{ + while (n >= PAGE_SIZE) { + /* + * Don't want to actually discard pages here because + * re-allocating the pages can result in writeback + * deadlocks under heavy load. + */ + if (0) + brd_free_page(brd, sector); + else + brd_zero_page(brd, sector); + sector += PAGE_SIZE >> SECTOR_SHIFT; + n -= PAGE_SIZE; + } +} + /* * Copy n bytes from src to the brd starting at sector. Does not sleep. */ @@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio) get_capacity(bdev->bd_disk)) goto out; + if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) { + err = 0; + discard_from_brd(brd, sector, bio->bi_size); + goto out; + } + rw = bio_rw(bio); if (rw == READA) rw = READ; @@ -320,7 +366,7 @@ out: } #ifdef CONFIG_BLK_DEV_XIP -static int brd_direct_access (struct block_device *bdev, sector_t sector, +static int brd_direct_access(struct block_device *bdev, sector_t sector, void **kaddr, unsigned long *pfn) { struct brd_device *brd = bdev->bd_disk->private_data; @@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i) blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); + brd->brd_queue->limits.discard_granularity = PAGE_SIZE; + brd->brd_queue->limits.max_discard_sectors = UINT_MAX; + brd->brd_queue->limits.discard_zeroes_data = 1; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); + disk = brd->brd_disk = alloc_disk(1 << part_shift); if (!disk) goto out_free_queue; diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e1d0e2cfec7..3381505c8a6 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd) sa = h->scsi_ctlr; stk = &sa->cmd_stack; + stk->top++; if (stk->top >= CMD_STACK_SIZE) { printk("cciss: scsi_cmd_free called too many times.\n"); BUG(); } - stk->top++; stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3390716898d..e3f88d6e141 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -84,6 +84,9 @@ struct drbd_bitmap { #define BM_MD_IO_ERROR 1 #define BM_P_VMALLOCED 2 +static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, + unsigned long e, int val, const enum km_type km); + static int bm_is_locked(struct drbd_bitmap *b) { return test_bit(BM_LOCKED, &b->bm_flags); @@ -441,7 +444,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) * In case this is actually a resize, we copy the old bitmap into the new one. * Otherwise, the bitmap is initialized to all bits set. */ -int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) +int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) { struct drbd_bitmap *b = mdev->bitmap; unsigned long bits, words, owords, obits, *p_addr, *bm; @@ -516,7 +519,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) obits = b->bm_bits; growing = bits > obits; - if (opages) + if (opages && growing && set_new_bits) bm_set_surplus(b); b->bm_pages = npages; @@ -526,8 +529,12 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) b->bm_dev_capacity = capacity; if (growing) { - bm_memset(b, owords, 0xff, words-owords); - b->bm_set += bits - obits; + if (set_new_bits) { + bm_memset(b, owords, 0xff, words-owords); + b->bm_set += bits - obits; + } else + bm_memset(b, owords, 0x00, words-owords); + } if (want < have) { @@ -773,7 +780,7 @@ static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int /* nothing to do, on disk == in memory */ # define bm_cpu_to_lel(x) ((void)0) # else -void bm_cpu_to_lel(struct drbd_bitmap *b) +static void bm_cpu_to_lel(struct drbd_bitmap *b) { /* need to cpu_to_lel all the pages ... * this may be optimized by using @@ -1015,7 +1022,7 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f * wants bitnr, not sector. * expected to be called for only a few bits (e - s about BITS_PER_LONG). * Must hold bitmap lock already. */ -int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, +static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, unsigned long e, int val, const enum km_type km) { struct drbd_bitmap *b = mdev->bitmap; @@ -1053,7 +1060,7 @@ int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, * for val != 0, we change 0 -> 1, return code positive * for val == 0, we change 1 -> 0, return code negative * wants bitnr, not sector */ -int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, +static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, const unsigned long e, int val) { unsigned long flags; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e5e86a78182..485ed8c7d62 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -132,6 +132,7 @@ enum { DRBD_FAULT_DT_RA = 6, /* data read ahead */ DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */ DRBD_FAULT_AL_EE = 8, /* alloc ee */ + DRBD_FAULT_RECEIVE = 9, /* Changes some bytes upon receiving a [rs]data block */ DRBD_FAULT_MAX, }; @@ -208,8 +209,11 @@ enum drbd_packets { P_RS_IS_IN_SYNC = 0x22, /* meta socket */ P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */ P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */ + /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ + /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ + P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ - P_MAX_CMD = 0x25, + P_MAX_CMD = 0x28, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, @@ -264,6 +268,7 @@ static inline const char *cmdname(enum drbd_packets cmd) [P_CSUM_RS_REQUEST] = "CsumRSRequest", [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", [P_COMPRESSED_BITMAP] = "CBitmap", + [P_DELAY_PROBE] = "DelayProbe", [P_MAX_CMD] = NULL, }; @@ -481,7 +486,8 @@ struct p_sizes { u64 u_size; /* user requested size */ u64 c_size; /* current exported size */ u32 max_segment_size; /* Maximal size of a BIO */ - u32 queue_order_type; + u16 queue_order_type; /* not yet implemented in DRBD*/ + u16 dds_flags; /* use enum dds_flags here. */ } __packed; struct p_state { @@ -538,6 +544,18 @@ struct p_compressed_bm { u8 code[0]; } __packed; +struct p_delay_probe { + struct p_header head; + u32 seq_num; /* sequence number to match the two probe packets */ + u32 offset; /* usecs the probe got sent after the reference time point */ +} __packed; + +struct delay_probe { + struct list_head list; + unsigned int seq_num; + struct timeval time; +}; + /* DCBP: Drbd Compressed Bitmap Packet ... */ static inline enum drbd_bitmap_code DCBP_get_code(struct p_compressed_bm *p) @@ -722,22 +740,6 @@ enum epoch_event { EV_CLEANUP = 32, /* used as flag */ }; -struct drbd_epoch_entry { - struct drbd_work w; - struct drbd_conf *mdev; - struct bio *private_bio; - struct hlist_node colision; - sector_t sector; - unsigned int size; - struct drbd_epoch *epoch; - - /* up to here, the struct layout is identical to drbd_request; - * we might be able to use that to our advantage... */ - - unsigned int flags; - u64 block_id; -}; - struct drbd_wq_barrier { struct drbd_work w; struct completion done; @@ -748,17 +750,49 @@ struct digest_info { void *digest; }; -/* ee flag bits */ +struct drbd_epoch_entry { + struct drbd_work w; + struct hlist_node colision; + struct drbd_epoch *epoch; + struct drbd_conf *mdev; + struct page *pages; + atomic_t pending_bios; + unsigned int size; + /* see comments on ee flag bits below */ + unsigned long flags; + sector_t sector; + u64 block_id; +}; + +/* ee flag bits. + * While corresponding bios are in flight, the only modification will be + * set_bit WAS_ERROR, which has to be atomic. + * If no bios are in flight yet, or all have been completed, + * non-atomic modification to ee->flags is ok. + */ enum { __EE_CALL_AL_COMPLETE_IO, - __EE_CONFLICT_PENDING, __EE_MAY_SET_IN_SYNC, + + /* This epoch entry closes an epoch using a barrier. + * On sucessful completion, the epoch is released, + * and the P_BARRIER_ACK send. */ __EE_IS_BARRIER, + + /* In case a barrier failed, + * we need to resubmit without the barrier flag. */ + __EE_RESUBMITTED, + + /* we may have several bios per epoch entry. + * if any of those fail, we set this flag atomically + * from the endio callback */ + __EE_WAS_ERROR, }; #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) -#define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) #define EE_IS_BARRIER (1<<__EE_IS_BARRIER) +#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) +#define EE_WAS_ERROR (1<<__EE_WAS_ERROR) /* global flag bits */ enum { @@ -908,9 +942,11 @@ struct drbd_conf { unsigned int ko_count; struct drbd_work resync_work, unplug_work, - md_sync_work; + md_sync_work, + delay_probe_work; struct timer_list resync_timer; struct timer_list md_sync_timer; + struct timer_list delay_probe_timer; /* Used after attach while negotiating new disk state. */ union drbd_state new_state_tmp; @@ -1026,6 +1062,12 @@ struct drbd_conf { u64 ed_uuid; /* UUID of the exposed data */ struct mutex state_mutex; char congestion_reason; /* Why we where congested... */ + struct list_head delay_probes; /* protected by peer_seq_lock */ + int data_delay; /* Delay of packets on the data-sock behind meta-sock */ + unsigned int delay_seq; /* To generate sequence numbers of delay probes */ + struct timeval dps_time; /* delay-probes-start-time */ + unsigned int dp_volume_last; /* send_cnt of last delay probe */ + int c_sync_rate; /* current resync rate after delay_probe magic */ }; static inline struct drbd_conf *minor_to_mdev(unsigned int minor) @@ -1081,6 +1123,11 @@ enum chg_state_flags { CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, }; +enum dds_flags { + DDSF_FORCED = 1, + DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ +}; + extern void drbd_init_set_defaults(struct drbd_conf *mdev); extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, union drbd_state mask, union drbd_state val); @@ -1113,7 +1160,7 @@ extern int drbd_send_protocol(struct drbd_conf *mdev); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); -extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply); +extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, @@ -1311,7 +1358,7 @@ struct bm_extent { #define APP_R_HSIZE 15 extern int drbd_bm_init(struct drbd_conf *mdev); -extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors); +extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits); extern void drbd_bm_cleanup(struct drbd_conf *mdev); extern void drbd_bm_set_all(struct drbd_conf *mdev); extern void drbd_bm_clear_all(struct drbd_conf *mdev); @@ -1383,7 +1430,7 @@ extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; -extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local); +extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, @@ -1414,7 +1461,8 @@ static inline void ov_oos_print(struct drbd_conf *mdev) } -extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); +extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); +extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *); /* worker callbacks */ extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); @@ -1426,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); -extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); @@ -1438,6 +1485,8 @@ extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); /* drbd_receiver.c */ +extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, + const unsigned rw, const int fault_type); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, u64 id, @@ -1490,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock) static inline void drbd_tcp_quickack(struct socket *sock) { - int __user val = 1; + int __user val = 2; (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char __user *)&val, sizeof(val)); } @@ -1593,6 +1642,41 @@ void drbd_bcast_ee(struct drbd_conf *mdev, * inline helper functions *************************/ +/* see also page_chain_add and friends in drbd_receiver.c */ +static inline struct page *page_chain_next(struct page *page) +{ + return (struct page *)page_private(page); +} +#define page_chain_for_each(page) \ + for (; page && ({ prefetch(page_chain_next(page)); 1; }); \ + page = page_chain_next(page)) +#define page_chain_for_each_safe(page, n) \ + for (; page && ({ n = page_chain_next(page); 1; }); page = n) + +static inline int drbd_bio_has_active_page(struct bio *bio) +{ + struct bio_vec *bvec; + int i; + + __bio_for_each_segment(bvec, bio, i, 0) { + if (page_count(bvec->bv_page) > 1) + return 1; + } + + return 0; +} + +static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) +{ + struct page *page = e->pages; + page_chain_for_each(page) { + if (page_count(page) > 1) + return 1; + } + return 0; +} + + static inline void drbd_state_lock(struct drbd_conf *mdev) { wait_event(mdev->misc_wait, @@ -1641,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, switch (mdev->ldev->dc.on_io_error) { case EP_PASS_ON: if (!forcedetach) { - if (printk_ratelimit()) + if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s." "Passing error on...\n", where); break; @@ -2138,7 +2222,7 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) /* I'd like to use wait_event_lock_irq, * but I'm not sure when it got introduced, * and not sure when it has 3 or 4 arguments */ -static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) +static inline void inc_ap_bio(struct drbd_conf *mdev, int count) { /* compare with after_state_ch, * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ @@ -2160,7 +2244,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) finish_wait(&mdev->misc_wait, &wait); spin_lock_irq(&mdev->req_lock); } - atomic_add(one_or_two, &mdev->ap_bio_cnt); + atomic_add(count, &mdev->ap_bio_cnt); spin_unlock_irq(&mdev->req_lock); } @@ -2251,7 +2335,8 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) if (test_bit(MD_NO_BARRIER, &mdev->flags)) return; - r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL); + r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); if (r) { set_bit(MD_NO_BARRIER, &mdev->flags); dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 93d1f9b469d..6b077f93acc 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -684,6 +684,9 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) rv = SS_NO_REMOTE_DISK; + else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; + else if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S || ns.conn == C_SYNC_SOURCE || @@ -840,7 +843,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state break; case C_WF_BITMAP_S: case C_PAUSED_SYNC_S: - ns.pdsk = D_OUTDATED; + /* remap any consistent state to D_OUTDATED, + * but disallow "upgrade" of not even consistent states. + */ + ns.pdsk = + (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED) + ? os.pdsk : D_OUTDATED; break; case C_SYNC_SOURCE: ns.pdsk = D_INCONSISTENT; @@ -1205,8 +1213,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, && (ns.pdsk < D_INCONSISTENT || ns.pdsk == D_UNKNOWN || ns.pdsk == D_OUTDATED)) { - kfree(mdev->p_uuid); - mdev->p_uuid = NULL; if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { @@ -1232,7 +1238,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ - drbd_send_sizes(mdev, 0); /* to start sync... */ + drbd_send_sizes(mdev, 0, 0); /* to start sync... */ drbd_send_uuids(mdev); drbd_send_state(mdev); } @@ -1755,7 +1761,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) (struct p_header *)&p, sizeof(p)); } -int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) +int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) { struct p_sizes p; sector_t d_size, u_size; @@ -1767,7 +1773,6 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) d_size = drbd_get_max_capacity(mdev->ldev); u_size = mdev->ldev->dc.disk_size; q_order_type = drbd_queue_order_type(mdev); - p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev)); put_ldev(mdev); } else { d_size = 0; @@ -1779,7 +1784,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) p.u_size = cpu_to_be64(u_size); p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); - p.queue_order_type = cpu_to_be32(q_order_type); + p.queue_order_type = cpu_to_be16(q_order_type); + p.dds_flags = cpu_to_be16(flags); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, (struct p_header *)&p, sizeof(p)); @@ -2180,6 +2186,43 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) return ok; } +static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds) +{ + struct p_delay_probe dp; + int offset, ok = 0; + struct timeval now; + + mutex_lock(&ds->mutex); + if (likely(ds->socket)) { + do_gettimeofday(&now); + offset = now.tv_usec - mdev->dps_time.tv_usec + + (now.tv_sec - mdev->dps_time.tv_sec) * 1000000; + dp.seq_num = cpu_to_be32(mdev->delay_seq); + dp.offset = cpu_to_be32(offset); + + ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE, + (struct p_header *)&dp, sizeof(dp), 0); + } + mutex_unlock(&ds->mutex); + + return ok; +} + +static int drbd_send_delay_probes(struct drbd_conf *mdev) +{ + int ok; + + mdev->delay_seq++; + do_gettimeofday(&mdev->dps_time); + ok = drbd_send_delay_probe(mdev, &mdev->meta); + ok = ok && drbd_send_delay_probe(mdev, &mdev->data); + + mdev->dp_volume_last = mdev->send_cnt; + mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10); + + return ok; +} + /* called on sndtimeo * returns FALSE if we should retry, * TRUE if we think connection is dead @@ -2229,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * * with page_count == 0 or PageSlab. */ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, - int offset, size_t size) + int offset, size_t size, unsigned msg_flags) { - int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); + int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags); kunmap(page); if (sent == size) mdev->send_cnt += size>>9; @@ -2239,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, } static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, - int offset, size_t size) + int offset, size_t size, unsigned msg_flags) { mm_segment_t oldfs = get_fs(); int sent, ok; @@ -2252,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) - return _drbd_no_send_page(mdev, page, offset, size); + return _drbd_no_send_page(mdev, page, offset, size, msg_flags); + msg_flags |= MSG_NOSIGNAL; drbd_update_congested(mdev); set_fs(KERNEL_DS); do { sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, offset, len, - MSG_NOSIGNAL); + msg_flags); if (sent == -EAGAIN) { if (we_should_drop_the_connection(mdev, mdev->data.socket)) @@ -2288,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) { struct bio_vec *bvec; int i; + /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { if (!_drbd_no_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len)) + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) return 0; } return 1; @@ -2300,15 +2346,56 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) { struct bio_vec *bvec; int i; + /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { if (!_drbd_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len)) + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) + return 0; + } + return 1; +} + +static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +{ + struct page *page = e->pages; + unsigned len = e->size; + /* hint all but last page with MSG_MORE */ + page_chain_for_each(page) { + unsigned l = min_t(unsigned, len, PAGE_SIZE); + if (!_drbd_send_page(mdev, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0)) return 0; + len -= l; } + return 1; +} + +static void consider_delay_probes(struct drbd_conf *mdev) +{ + if (mdev->state.conn != C_SYNC_SOURCE || mdev->agreed_pro_version < 93) + return; + + if (mdev->dp_volume_last + mdev->sync_conf.dp_volume * 2 < mdev->send_cnt) + drbd_send_delay_probes(mdev); +} + +static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + if (!cancel && mdev->state.conn == C_SYNC_SOURCE) + drbd_send_delay_probes(mdev); return 1; } +static void delay_probe_timer_fn(unsigned long data) +{ + struct drbd_conf *mdev = (struct drbd_conf *) data; + + if (list_empty(&mdev->delay_probe_work.list)) + drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work); +} + /* Used to send write requests * R_PRIMARY -> Peer (P_DATA) */ @@ -2357,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.dp_flags = cpu_to_be32(dp_flags); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == - drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); + drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { dgb = mdev->int_dig_out; - drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); - ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); } if (ok) { if (mdev->net_conf->wire_protocol == DRBD_PROT_A) @@ -2371,6 +2458,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) } drbd_put_data_sock(mdev); + + if (ok) + consider_delay_probes(mdev); + return ok; } @@ -2406,16 +2497,20 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, return 0; ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, - sizeof(p), MSG_MORE); + sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->int_dig_out; - drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb); - ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); } if (ok) - ok = _drbd_send_zc_bio(mdev, e->private_bio); + ok = _drbd_send_zc_ee(mdev, e); drbd_put_data_sock(mdev); + + if (ok) + consider_delay_probes(mdev); + return ok; } @@ -2628,16 +2723,24 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) INIT_LIST_HEAD(&mdev->unplug_work.list); INIT_LIST_HEAD(&mdev->md_sync_work.list); INIT_LIST_HEAD(&mdev->bm_io_work.w.list); + INIT_LIST_HEAD(&mdev->delay_probes); + INIT_LIST_HEAD(&mdev->delay_probe_work.list); + mdev->resync_work.cb = w_resync_inactive; mdev->unplug_work.cb = w_send_write_hint; mdev->md_sync_work.cb = w_md_sync; mdev->bm_io_work.w.cb = w_bitmap_io; + mdev->delay_probe_work.cb = w_delay_probes; init_timer(&mdev->resync_timer); init_timer(&mdev->md_sync_timer); + init_timer(&mdev->delay_probe_timer); mdev->resync_timer.function = resync_timer_fn; mdev->resync_timer.data = (unsigned long) mdev; mdev->md_sync_timer.function = md_sync_timer_fn; mdev->md_sync_timer.data = (unsigned long) mdev; + mdev->delay_probe_timer.function = delay_probe_timer_fn; + mdev->delay_probe_timer.data = (unsigned long) mdev; + init_waitqueue_head(&mdev->misc_wait); init_waitqueue_head(&mdev->state_wait); @@ -2680,7 +2783,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) drbd_set_my_capacity(mdev, 0); if (mdev->bitmap) { /* maybe never allocated. */ - drbd_bm_resize(mdev, 0); + drbd_bm_resize(mdev, 0, 1); drbd_bm_cleanup(mdev); } @@ -3129,7 +3232,7 @@ int __init drbd_init(void) if (err) goto Enomem; - drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops); + drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); if (!drbd_proc) { printk(KERN_ERR "drbd: unable to register proc file\n"); goto Enomem; @@ -3660,7 +3763,8 @@ _drbd_fault_str(unsigned int type) { [DRBD_FAULT_DT_RD] = "Data read", [DRBD_FAULT_DT_RA] = "Data read ahead", [DRBD_FAULT_BM_ALLOC] = "BM allocation", - [DRBD_FAULT_AL_EE] = "EE allocation" + [DRBD_FAULT_AL_EE] = "EE allocation", + [DRBD_FAULT_RECEIVE] = "receive data corruption", }; return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; @@ -3679,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) if (ret) { fault_count++; - if (printk_ratelimit()) + if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "***Simulating %s failure\n", _drbd_fault_str(type)); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 6429d2b19e0..632e3245d1b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -510,7 +510,7 @@ void drbd_r |