diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-12 08:52:47 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-12 08:52:47 -0800 |
commit | 8a9f772c14f85e2a580baadc50c194835da2d4e5 (patch) | |
tree | 4ac04e465fa8295944f997fb517dc9904bb8e4f3 | |
parent | 25a34554d600b799cbf5159bef372b02d3b4e1c6 (diff) | |
parent | cedb4a7d9f6aedb0dce94d6285b69dcb3c10fa05 (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (27 commits)
block: remove unused copy_io_context()
Documentation: remove anticipatory scheduler info
block: remove REQ_HARDBARRIER
ioprio: rcu_read_lock/unlock protect find_task_by_vpid call (V2)
ioprio: fix RCU locking around task dereference
block: ioctl: fix information leak to userland
block: read i_size with i_size_read()
cciss: fix proc warning on attempt to remove non-existant directory
bio: take care not overflow page count when mapping/copying user data
block: limit vec count in bio_kmalloc() and bio_alloc_map_data()
block: take care not to overflow when calculating total iov length
block: check for proper length of iov entries in blk_rq_map_user_iov()
cciss: remove controllers supported by hpsa
cciss: use usleep_range not msleep for small sleeps
cciss: limit commands allocated on reset_devices
cciss: Use kernel provided PCI state save and restore functions
cciss: fix board status waiting code
drbd: Removed checks for REQ_HARDBARRIER on incomming BIOs
drbd: REQ_HARDBARRIER -> REQ_FUA transition for meta data accesses
drbd: Removed the BIO_RW_BARRIER support form the receiver/epoch code
...
34 files changed, 367 insertions, 519 deletions
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt index d5af3f63081..71cfbdc0f74 100644 --- a/Documentation/block/switching-sched.txt +++ b/Documentation/block/switching-sched.txt @@ -16,7 +16,7 @@ you can do so by typing: As of the Linux 2.6.10 kernel, it is now possible to change the IO scheduler for a given block device on the fly (thus making it possible, for instance, to set the CFQ scheduler for the system default, but -set a specific device to use the anticipatory or noop schedulers - which +set a specific device to use the deadline or noop schedulers - which can improve that device's throughput). To set a specific scheduler, simply do this: @@ -31,7 +31,7 @@ a "cat /sys/block/DEV/queue/scheduler" - the list of valid names will be displayed, with the currently selected scheduler in brackets: # cat /sys/block/hda/queue/scheduler -noop anticipatory deadline [cfq] -# echo anticipatory > /sys/block/hda/queue/scheduler +noop deadline [cfq] +# echo deadline > /sys/block/hda/queue/scheduler # cat /sys/block/hda/queue/scheduler -noop [anticipatory] deadline cfq +noop [deadline] cfq diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ed45e9802aa..92e83e53148 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -706,7 +706,7 @@ and is between 256 and 4096 characters. It is defined in the file arch/x86/kernel/cpu/cpufreq/elanfreq.c. elevator= [IOSCHED] - Format: {"anticipatory" | "cfq" | "deadline" | "noop"} + Format: {"cfq" | "deadline" | "noop"} See Documentation/block/as-iosched.txt and Documentation/block/deadline-iosched.txt for details. diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt index 221f38be98f..19f8278c385 100644 --- a/Documentation/rbtree.txt +++ b/Documentation/rbtree.txt @@ -21,8 +21,8 @@ three rotations, respectively, to balance the tree), with slightly slower To quote Linux Weekly News: There are a number of red-black trees in use in the kernel. - The anticipatory, deadline, and CFQ I/O schedulers all employ - rbtrees to track requests; the packet CD/DVD driver does the same. + The deadline and CFQ I/O schedulers employ rbtrees to + track requests; the packet CD/DVD driver does the same. The high-resolution timer code uses an rbtree to organize outstanding timer requests. The ext3 filesystem tracks directory entries in a red-black tree. Virtual memory areas (VMAs) are tracked with red-black diff --git a/block/blk-core.c b/block/blk-core.c index f0834e2f572..4ce953f1b39 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1194,13 +1194,6 @@ static int __make_request(struct request_queue *q, struct bio *bio) int where = ELEVATOR_INSERT_SORT; int rw_flags; - /* REQ_HARDBARRIER is no more */ - if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, - "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { - bio_endio(bio, -EOPNOTSUPP); - return 0; - } - /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -1351,7 +1344,7 @@ static void handle_bad_sector(struct bio *bio) bdevname(bio->bi_bdev, b), bio->bi_rw, (unsigned long long)bio->bi_sector + bio_sectors(bio), - (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); + (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); set_bit(BIO_EOF, &bio->bi_flags); } @@ -1404,7 +1397,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) return 0; /* Test device or partition size, when known. */ - maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; if (maxsector) { sector_t sector = bio->bi_sector; diff --git a/block/blk-ioc.c b/block/blk-ioc.c index d22c4c55c40..3c7a339fe38 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -153,20 +153,6 @@ struct io_context *get_io_context(gfp_t gfp_flags, int node) } EXPORT_SYMBOL(get_io_context); -void copy_io_context(struct io_context **pdst, struct io_context **psrc) -{ - struct io_context *src = *psrc; - struct io_context *dst = *pdst; - - if (src) { - BUG_ON(atomic_long_read(&src->refcount) == 0); - atomic_long_inc(&src->refcount); - put_io_context(dst); - *pdst = src; - } -} -EXPORT_SYMBOL(copy_io_context); - static int __init blk_ioc_init(void) { iocontext_cachep = kmem_cache_create("blkdev_ioc", diff --git a/block/blk-map.c b/block/blk-map.c index d4a586d8691..5d5dbe47c22 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -205,6 +205,8 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, unaligned = 1; break; } + if (!iov[i].iov_len) + return -EINVAL; } if (unaligned || (q->dma_pad_mask & len) || map_data) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 119f07b74dc..58c6ee5b010 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -744,13 +744,13 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKGETSIZE: - size = bdev->bd_inode->i_size; + size = i_size_read(bdev->bd_inode); if ((size >> 9) > ~0UL) return -EFBIG; return compat_put_ulong(arg, size >> 9); case BLKGETSIZE64_32: - return compat_put_u64(arg, bdev->bd_inode->i_size); + return compat_put_u64(arg, i_size_read(bdev->bd_inode)); case BLKTRACESETUP32: case BLKTRACESTART: /* compatible */ diff --git a/block/elevator.c b/block/elevator.c index 282e8308f7e..2569512830d 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -429,7 +429,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) q->nr_sorted--; boundary = q->end_sector; - stop_flags = REQ_SOFTBARRIER | REQ_HARDBARRIER | REQ_STARTED; + stop_flags = REQ_SOFTBARRIER | REQ_STARTED; list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); @@ -691,7 +691,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) void __elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { - if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { + if (rq->cmd_flags & REQ_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS || (rq->cmd_flags & REQ_DISCARD)) { diff --git a/block/ioctl.c b/block/ioctl.c index d724ceb1d46..3d866d0037f 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -125,7 +125,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, start >>= 9; len >>= 9; - if (start + len > (bdev->bd_inode->i_size >> 9)) + if (start + len > (i_size_read(bdev->bd_inode) >> 9)) return -EINVAL; if (secure) flags |= BLKDEV_DISCARD_SECURE; @@ -242,6 +242,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, * We need to set the startsect first, the driver may * want to override it. */ + memset(&geo, 0, sizeof(geo)); geo.start = get_start_sect(bdev); ret = disk->fops->getgeo(bdev, &geo); if (ret) @@ -307,12 +308,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, ret = blkdev_reread_part(bdev); break; case BLKGETSIZE: - size = bdev->bd_inode->i_size; + size = i_size_read(bdev->bd_inode); if ((size >> 9) > ~0UL) return -EFBIG; return put_ulong(arg, size >> 9); case BLKGETSIZE64: - return put_u64(arg, bdev->bd_inode->i_size); + return put_u64(arg, i_size_read(bdev->bd_inode)); case BLKTRACESTART: case BLKTRACESTOP: case BLKTRACESETUP: diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index a8b5a10eb5b..4f4230b79bb 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -321,33 +321,47 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, if (hdr->iovec_count) { const int size = sizeof(struct sg_iovec) * hdr->iovec_count; size_t iov_data_len; - struct sg_iovec *iov; + struct sg_iovec *sg_iov; + struct iovec *iov; + int i; - iov = kmalloc(size, GFP_KERNEL); - if (!iov) { + sg_iov = kmalloc(size, GFP_KERNEL); + if (!sg_iov) { ret = -ENOMEM; goto out; } - if (copy_from_user(iov, hdr->dxferp, size)) { - kfree(iov); + if (copy_from_user(sg_iov, hdr->dxferp, size)) { + kfree(sg_iov); ret = -EFAULT; goto out; } + /* + * Sum up the vecs, making sure they don't overflow + */ + iov = (struct iovec *) sg_iov; + iov_data_len = 0; + for (i = 0; i < hdr->iovec_count; i++) { + if (iov_data_len + iov[i].iov_len < iov_data_len) { + kfree(sg_iov); + ret = -EINVAL; + goto out; + } + iov_data_len += iov[i].iov_len; + } + /* SG_IO howto says that the shorter of the two wins */ - iov_data_len = iov_length((struct iovec *)iov, - hdr->iovec_count); if (hdr->dxfer_len < iov_data_len) { - hdr->iovec_count = iov_shorten((struct iovec *)iov, + hdr->iovec_count = iov_shorten(iov, hdr->iovec_count, hdr->dxfer_len); iov_data_len = hdr->dxfer_len; } - ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, + ret = blk_rq_map_user_iov(q, rq, NULL, sg_iov, hdr->iovec_count, iov_data_len, GFP_KERNEL); - kfree(iov); + kfree(sg_iov); } else if (hdr->dxfer_len) ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, GFP_KERNEL); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 541e1887996..528f6318ded 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -180,9 +180,6 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio) BUG(); bio_endio(bio, -ENXIO); return 0; - } else if (bio->bi_rw & REQ_HARDBARRIER) { - bio_endio(bio, -EOPNOTSUPP); - return 0; } else if (bio->bi_io_vec == NULL) { printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); BUG(); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 2cc4dda4627..a67d0a611a8 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -113,6 +113,8 @@ static struct board_type products[] = { {0x409D0E11, "Smart Array 6400 EM", &SA5_access}, {0x40910E11, "Smart Array 6i", &SA5_access}, {0x3225103C, "Smart Array P600", &SA5_access}, + {0x3223103C, "Smart Array P800", &SA5_access}, + {0x3234103C, "Smart Array P400", &SA5_access}, {0x3235103C, "Smart Array P400i", &SA5_access}, {0x3211103C, "Smart Array E200i", &SA5_access}, {0x3212103C, "Smart Array E200", &SA5_access}, @@ -3753,7 +3755,7 @@ static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h) for (i = 0; i < MAX_CONFIG_WAIT; i++) { if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq)) break; - msleep(10); + usleep_range(10000, 20000); } } @@ -3937,10 +3939,9 @@ static int __devinit cciss_lookup_board_id(struct pci_dev *pdev, u32 *board_id) *board_id = ((subsystem_device_id << 16) & 0xffff0000) | subsystem_vendor_id; - for (i = 0; i < ARRAY_SIZE(products); i++) { + for (i = 0; i < ARRAY_SIZE(products); i++) if (*board_id == products[i].board_id) return i; - } dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x, ignoring.\n", *board_id); return -ENODEV; @@ -3971,18 +3972,31 @@ static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, return -ENODEV; } -static int __devinit cciss_wait_for_board_ready(ctlr_info_t *h) +static int __devinit cciss_wait_for_board_state(struct pci_dev *pdev, + void __iomem *vaddr, int wait_for_ready) +#define BOARD_READY 1 +#define BOARD_NOT_READY 0 { - int i; + int i, iterations; u32 scratchpad; - for (i = 0; i < CCISS_BOARD_READY_ITERATIONS; i++) { - scratchpad = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET); - if (scratchpad == CCISS_FIRMWARE_READY) - return 0; + if (wait_for_ready) + iterations = CCISS_BOARD_READY_ITERATIONS; + else + iterations = CCISS_BOARD_NOT_READY_ITERATIONS; + + for (i = 0; i < iterations; i++) { + scratchpad = readl(vaddr + SA5_SCRATCHPAD_OFFSET); + if (wait_for_ready) { + if (scratchpad == CCISS_FIRMWARE_READY) + return 0; + } else { + if (scratchpad != CCISS_FIRMWARE_READY) + return 0; + } msleep(CCISS_BOARD_READY_POLL_INTERVAL_MSECS); } - dev_warn(&h->pdev->dev, "board not ready, timed out.\n"); + dev_warn(&pdev->dev, "board not ready, timed out.\n"); return -ENODEV; } @@ -4031,6 +4045,11 @@ static int __devinit cciss_find_cfgtables(ctlr_info_t *h) static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h) { h->max_commands = readl(&(h->cfgtable->MaxPerformantModeCommands)); + + /* Limit commands in memory limited kdump scenario. */ + if (reset_devices && h->max_commands > 32) + h->max_commands = 32; + if (h->max_commands < 16) { dev_warn(&h->pdev->dev, "Controller reports " "max supported commands of %d, an obvious lie. " @@ -4148,7 +4167,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *h) err = -ENOMEM; goto err_out_free_res; } - err = cciss_wait_for_board_ready(h); + err = cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY); if (err) goto err_out_free_res; err = cciss_find_cfgtables(h); @@ -4313,36 +4332,6 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u #define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) #define cciss_noop(p) cciss_message(p, 3, 0) -static __devinit int cciss_reset_msi(struct pci_dev *pdev) -{ -/* the #defines are stolen from drivers/pci/msi.h. */ -#define msi_control_reg(base) (base + PCI_MSI_FLAGS) -#define PCI_MSIX_FLAGS_ENABLE (1 << 15) - - int pos; - u16 control = 0; - - pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); - if (pos) { - pci_read_config_word(pdev, msi_control_reg(pos), &control); - if (control & PCI_MSI_FLAGS_ENABLE) { - dev_info(&pdev->dev, "resetting MSI\n"); - pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE); - } - } - - pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); - if (pos) { - pci_read_config_word(pdev, msi_control_reg(pos), &control); - if (control & PCI_MSIX_FLAGS_ENABLE) { - dev_info(&pdev->dev, "resetting MSI-X\n"); - pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE); - } - } - - return 0; -} - static int cciss_controller_hard_reset(struct pci_dev *pdev, void * __iomem vaddr, bool use_doorbell) { @@ -4397,17 +4386,17 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev, * states or using the doorbell register. */ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) { - u16 saved_config_space[32]; u64 cfg_offset; u32 cfg_base_addr; u64 cfg_base_addr_index; void __iomem *vaddr; unsigned long paddr; u32 misc_fw_support, active_transport; - int rc, i; + int rc; CfgTable_struct __iomem *cfgtable; bool use_doorbell; u32 board_id; + u16 command_register; /* For controllers as old a the p600, this is very nearly * the same thing as @@ -4417,14 +4406,6 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) * pci_set_power_state(pci_dev, PCI_D0); * pci_restore_state(pci_dev); * - * but we can't use these nice canned kernel routines on - * kexec, because they also check the MSI/MSI-X state in PCI - * configuration space and do the wrong thing when it is - * set/cleared. Also, the pci_save/restore_state functions - * violate the ordering requirements for restoring the - * configuration space from the CCISS document (see the - * comment below). So we roll our own .... - * * For controllers newer than the P600, the pci power state * method of resetting doesn't work so we have another way * using the doorbell register. @@ -4443,8 +4424,13 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) return -ENODEV; } - for (i = 0; i < 32; i++) - pci_read_config_word(pdev, 2*i, &saved_config_space[i]); + /* Save the PCI command register */ + pci_read_config_word(pdev, 4, &command_register); + /* Turn the board off. This is so that later pci_restore_state() + * won't turn the board on before the rest of config space is ready. + */ + pci_disable_device(pdev); + pci_save_state(pdev); /* find the first memory BAR, so we can find the cfg table */ rc = cciss_pci_find_memory_BAR(pdev, &paddr); @@ -4479,26 +4465,32 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell); if (rc) goto unmap_cfgtable; - - /* Restore the PCI configuration space. The Open CISS - * Specification says, "Restore the PCI Configuration - * Registers, offsets 00h through 60h. It is important to - * restore the command register, 16-bits at offset 04h, - * last. Do not restore the configuration status register, - * 16-bits at offset 06h." Note that the offset is 2*i. - */ - for (i = 0; i < 32; i++) { - if (i == 2 || i == 3) - continue; - pci_write_config_word(pdev, 2*i, saved_config_space[i]); + pci_restore_state(pdev); + rc = pci_enable_device(pdev); + if (rc) { + dev_warn(&pdev->dev, "failed to enable device.\n"); + goto unmap_cfgtable; } - wmb(); - pci_write_config_word(pdev, 4, saved_config_space[2]); + pci_write_config_word(pdev, 4, command_register); /* Some devices (notably the HP Smart Array 5i Controller) need a little pause here */ msleep(CCISS_POST_RESET_PAUSE_MSECS); + /* Wait for board to become not ready, then ready. */ + dev_info(&pdev->dev, "Waiting for board to become ready.\n"); + rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); + if (rc) /* Don't bail, might be E500, etc. which can't be reset */ + dev_warn(&pdev->dev, + "failed waiting for board to become not ready\n"); + rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY); + if (rc) { + dev_warn(&pdev->dev, + "failed waiting for board to become ready\n"); + goto unmap_cfgtable; + } + dev_info(&pdev->dev, "board ready.\n"); + /* Controller should be in simple mode at this point. If it's not, * It means we're on one of those controllers which doesn't support * the doorbell reset method and on which the PCI power management reset @@ -4539,8 +4531,6 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev) return 0; /* just try to do the kdump anyhow. */ if (rc) return -ENODEV; - if (cciss_reset_msi(pdev)) - return -ENODEV; /* Now try to get the controller to respond to a no-op */ for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) { @@ -4936,7 +4926,8 @@ static void __exit cciss_cleanup(void) } } kthread_stop(cciss_scan_thread); - remove_proc_entry("driver/cciss", NULL); + if (proc_cciss) + remove_proc_entry("driver/cciss", NULL); bus_unregister(&cciss_bus_type); } diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index ae340ffc8f8..4b8933d778f 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -200,10 +200,14 @@ struct ctlr_info * the above. */ #define CCISS_BOARD_READY_WAIT_SECS (120) +#define CCISS_BOARD_NOT_READY_WAIT_SECS (10) #define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100) #define CCISS_BOARD_READY_ITERATIONS \ ((CCISS_BOARD_READY_WAIT_SECS * 1000) / \ CCISS_BOARD_READY_POLL_INTERVAL_MSECS) +#define CCISS_BOARD_NOT_READY_ITERATIONS \ + ((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \ + CCISS_BOARD_READY_POLL_INTERVAL_MSECS) #define CCISS_POST_RESET_PAUSE_MSECS (3000) #define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000) #define CCISS_POST_RESET_NOOP_RETRIES (12) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ac04ef97eac..ba95cba192b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -78,11 +78,10 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, init_completion(&md_io.event); md_io.error = 0; - if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags)) - rw |= REQ_HARDBARRIER; + if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) + rw |= REQ_FUA; rw |= REQ_UNPLUG | REQ_SYNC; - retry: bio = bio_alloc(GFP_NOIO, 1); bio->bi_bdev = bdev->md_bdev; bio->bi_sector = sector; @@ -100,17 +99,6 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, wait_for_completion(&md_io.event); ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; - /* check for unsupported barrier op. - * would rather check on EOPNOTSUPP, but that is not reliable. - * don't try again for ANY return value != 0 */ - if (unlikely((bio->bi_rw & REQ_HARDBARRIER) && !ok)) { - /* Try again with no barrier */ - dev_warn(DEV, "Barriers not supported on meta data device - disabling\n"); - set_bit(MD_NO_BARRIER, &mdev->flags); - rw &= ~REQ_HARDBARRIER; - bio_put(bio); - goto retry; - } out: bio_put(bio); return ok; @@ -284,18 +272,32 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) u32 xor_sum = 0; if (!get_ldev(mdev)) { - dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n"); + dev_err(DEV, + "disk is %s, cannot start al transaction (-%d +%d)\n", + drbd_disk_str(mdev->state.disk), evicted, new_enr); complete(&((struct update_al_work *)w)->event); return 1; } /* do we have to do a bitmap write, first? * TODO reduce maximum latency: * submit both bios, then wait for both, - * instead of doing two synchronous sector writes. */ + * instead of doing two synchronous sector writes. + * For now, we must not write the transaction, + * if we cannot write out the bitmap of the evicted extent. */ if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); - mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */ + /* The bitmap write may have failed, causing a state change. */ + if (mdev->state.disk < D_INCONSISTENT) { + dev_err(DEV, + "disk is %s, cannot write al transaction (-%d +%d)\n", + drbd_disk_str(mdev->state.disk), evicted, new_enr); + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + return 1; + } + + mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ buffer = (struct al_transaction *)page_address(mdev->md_io_page); buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); @@ -739,7 +741,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev) unsigned int enr; unsigned long add = 0; char ppb[10]; - int i; + int i, tmp; wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); @@ -747,7 +749,9 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev) enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; - add += drbd_bm_ALe_set_all(mdev, enr); + tmp = drbd_bm_ALe_set_all(mdev, enr); + dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr); + add += tmp; } lc_unlock(mdev->act_log); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9bdcf4393c0..1ea1a34e78b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -114,11 +114,11 @@ struct drbd_conf; #define D_ASSERT(exp) if (!(exp)) \ dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) -#define ERR_IF(exp) if (({ \ - int _b = (exp) != 0; \ - if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ - __func__, #exp, __FILE__, __LINE__); \ - _b; \ +#define ERR_IF(exp) if (({ \ + int _b = (exp) != 0; \ + if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n", \ + __func__, #exp, __FILE__, __LINE__); \ + _b; \ })) /* Defines to control fault insertion */ @@ -749,17 +749,12 @@ struct drbd_epoch { /* drbd_epoch flag bits */ enum { - DE_BARRIER_IN_NEXT_EPOCH_ISSUED, - DE_BARRIER_IN_NEXT_EPOCH_DONE, - DE_CONTAINS_A_BARRIER, DE_HAVE_BARRIER_NUMBER, - DE_IS_FINISHING, }; enum epoch_event { EV_PUT, EV_GOT_BARRIER_NR, - EV_BARRIER_DONE, EV_BECAME_LAST, EV_CLEANUP = 32, /* used as flag */ }; @@ -801,11 +796,6 @@ enum { __EE_CALL_AL_COMPLETE_IO, __EE_MAY_SET_IN_SYNC, - /* This epoch entry closes an epoch using a barrier. - * On sucessful completion, the epoch is released, - * and the P_BARRIER_ACK send. */ - __EE_IS_BARRIER, - /* In case a barrier failed, * we need to resubmit without the barrier flag. */ __EE_RESUBMITTED, @@ -820,7 +810,6 @@ enum { }; #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) -#define EE_IS_BARRIER (1<<__EE_IS_BARRIER) #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) @@ -843,16 +832,15 @@ enum { * Gets cleared when the state.conn * goes into C_CONNECTED state. */ WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ - NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ CONSIDER_RESYNC, - MD_NO_BARRIER, /* meta data device does not support barriers, - so don't even try */ + MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ SUSPEND_IO, /* suspend application io */ BITMAP_IO, /* suspend application io; once no more io in flight, start bitmap io */ BITMAP_IO_QUEUED, /* Started bitmap IO */ - GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */ |