diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 23 | ||||
-rw-r--r-- | drivers/md/Makefile | 7 | ||||
-rw-r--r-- | drivers/md/dm-emc.c | 345 | ||||
-rw-r--r-- | drivers/md/dm-hw-handler.c | 213 | ||||
-rw-r--r-- | drivers/md/dm-hw-handler.h | 63 | ||||
-rw-r--r-- | drivers/md/dm-mpath-hp-sw.c | 247 | ||||
-rw-r--r-- | drivers/md/dm-mpath-rdac.c | 700 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 163 | ||||
-rw-r--r-- | drivers/md/dm-mpath.h | 1 | ||||
-rw-r--r-- | drivers/md/linear.c | 10 | ||||
-rw-r--r-- | drivers/md/raid0.c | 10 | ||||
-rw-r--r-- | drivers/md/raid10.c | 15 | ||||
-rw-r--r-- | drivers/md/raid5.c | 17 |
13 files changed, 123 insertions, 1691 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 610af916891..07d92c11b5d 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -252,27 +252,14 @@ config DM_ZERO config DM_MULTIPATH tristate "Multipath target" depends on BLK_DEV_DM + # nasty syntax but means make DM_MULTIPATH independent + # of SCSI_DH if the latter isn't defined but if + # it is, DM_MULTIPATH must depend on it. We get a build + # error if SCSI_DH=m and DM_MULTIPATH=y + depends on SCSI_DH || !SCSI_DH ---help--- Allow volume managers to support multipath hardware. -config DM_MULTIPATH_EMC - tristate "EMC CX/AX multipath support" - depends on DM_MULTIPATH && BLK_DEV_DM - ---help--- - Multipath support for EMC CX/AX series hardware. - -config DM_MULTIPATH_RDAC - tristate "LSI/Engenio RDAC multipath support (EXPERIMENTAL)" - depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL - ---help--- - Multipath support for LSI/Engenio RDAC. - -config DM_MULTIPATH_HP - tristate "HP MSA multipath support (EXPERIMENTAL)" - depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL - ---help--- - Multipath support for HP MSA (Active/Passive) series hardware. - config DM_DELAY tristate "I/O delaying target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 7be09eeea29..f1ef33dfd8c 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -4,11 +4,9 @@ dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-ioctl.o dm-io.o dm-kcopyd.o -dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o +dm-multipath-objs := dm-path-selector.o dm-mpath.o dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-mirror-objs := dm-raid1.o -dm-rdac-objs := dm-mpath-rdac.o -dm-hp-sw-objs := dm-mpath-hp-sw.o md-mod-objs := md.o bitmap.o raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ raid6int1.o raid6int2.o raid6int4.o \ @@ -35,9 +33,6 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o -obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o -obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o -obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o obj-$(CONFIG_DM_ZERO) += dm-zero.o diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c deleted file mode 100644 index 3ea5ad4b780..00000000000 --- a/drivers/md/dm-emc.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright (C) 2004 SUSE LINUX Products GmbH. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is released under the GPL. - * - * Multipath support for EMC CLARiiON AX/CX-series hardware. - */ - -#include "dm.h" -#include "dm-hw-handler.h" -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> - -#define DM_MSG_PREFIX "multipath emc" - -struct emc_handler { - spinlock_t lock; - - /* Whether we should send the short trespass command (FC-series) - * or the long version (default for AX/CX CLARiiON arrays). */ - unsigned short_trespass; - /* Whether or not to honor SCSI reservations when initiating a - * switch-over. Default: Don't. */ - unsigned hr; - - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; -}; - -#define TRESPASS_PAGE 0x22 -#define EMC_FAILOVER_TIMEOUT (60 * HZ) - -/* Code borrowed from dm-lsi-rdac by Mike Christie */ - -static inline void free_bio(struct bio *bio) -{ - __free_page(bio->bi_io_vec[0].bv_page); - bio_put(bio); -} - -static void emc_endio(struct bio *bio, int error) -{ - struct dm_path *path = bio->bi_private; - - /* We also need to look at the sense keys here whether or not to - * switch to the next PG etc. - * - * For now simple logic: either it works or it doesn't. - */ - if (error) - dm_pg_init_complete(path, MP_FAIL_PATH); - else - dm_pg_init_complete(path, 0); - - /* request is freed in block layer */ - free_bio(bio); -} - -static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size) -{ - struct bio *bio; - struct page *page; - - bio = bio_alloc(GFP_ATOMIC, 1); - if (!bio) { - DMERR("get_failover_bio: bio_alloc() failed."); - return NULL; - } - - bio->bi_rw |= (1 << BIO_RW); - bio->bi_bdev = path->dev->bdev; - bio->bi_sector = 0; - bio->bi_private = path; - bio->bi_end_io = emc_endio; - - page = alloc_page(GFP_ATOMIC); - if (!page) { - DMERR("get_failover_bio: alloc_page() failed."); - bio_put(bio); - return NULL; - } - - if (bio_add_page(bio, page, data_size, 0) != data_size) { - DMERR("get_failover_bio: bio_add_page() failed."); - __free_page(page); - bio_put(bio); - return NULL; - } - - return bio; -} - -static struct request *get_failover_req(struct emc_handler *h, - struct bio *bio, struct dm_path *path) -{ - struct request *rq; - struct block_device *bdev = bio->bi_bdev; - struct request_queue *q = bdev_get_queue(bdev); - - /* FIXME: Figure out why it fails with GFP_ATOMIC. */ - rq = blk_get_request(q, WRITE, __GFP_WAIT); - if (!rq) { - DMERR("get_failover_req: blk_get_request failed"); - return NULL; - } - - blk_rq_append_bio(q, rq, bio); - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = 0; - - rq->timeout = EMC_FAILOVER_TIMEOUT; - rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; - - return rq; -} - -static struct request *emc_trespass_get(struct emc_handler *h, - struct dm_path *path) -{ - struct bio *bio; - struct request *rq; - unsigned char *page22; - unsigned char long_trespass_pg[] = { - 0, 0, 0, 0, - TRESPASS_PAGE, /* Page code */ - 0x09, /* Page length - 2 */ - h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */ - 0xff, 0xff, /* Trespass target */ - 0, 0, 0, 0, 0, 0 /* Reserved bytes / unknown */ - }; - unsigned char short_trespass_pg[] = { - 0, 0, 0, 0, - TRESPASS_PAGE, /* Page code */ - 0x02, /* Page length - 2 */ - h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */ - 0xff, /* Trespass target */ - }; - unsigned data_size = h->short_trespass ? sizeof(short_trespass_pg) : - sizeof(long_trespass_pg); - - /* get bio backing */ - if (data_size > PAGE_SIZE) - /* this should never happen */ - return NULL; - - bio = get_failover_bio(path, data_size); - if (!bio) { - DMERR("emc_trespass_get: no bio"); - return NULL; - } - - page22 = (unsigned char *)bio_data(bio); - memset(page22, 0, data_size); - - memcpy(page22, h->short_trespass ? - short_trespass_pg : long_trespass_pg, data_size); - - /* get request for block layer packet command */ - rq = get_failover_req(h, bio, path); - if (!rq) { - DMERR("emc_trespass_get: no rq"); - free_bio(bio); - return NULL; - } - - /* Prepare the command. */ - rq->cmd[0] = MODE_SELECT; - rq->cmd[1] = 0x10; - rq->cmd[4] = data_size; - rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); - - return rq; -} - -static void emc_pg_init(struct hw_handler *hwh, unsigned bypassed, - struct dm_path *path) -{ - struct request *rq; - struct request_queue *q = bdev_get_queue(path->dev->bdev); - - /* - * We can either blindly init the pg (then look at the sense), - * or we can send some commands to get the state here (then - * possibly send the fo cmnd), or we can also have the - * initial state passed into us and then get an update here. - */ - if (!q) { - DMINFO("emc_pg_init: no queue"); - goto fail_path; - } - - /* FIXME: The request should be pre-allocated. */ - rq = emc_trespass_get(hwh->context, path); - if (!rq) { - DMERR("emc_pg_init: no rq"); - goto fail_path; - } - - DMINFO("emc_pg_init: sending switch-over command"); - elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1); - return; - -fail_path: - dm_pg_init_complete(path, MP_FAIL_PATH); -} - -static struct emc_handler *alloc_emc_handler(void) -{ - struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL); - - if (h) - spin_lock_init(&h->lock); - - return h; -} - -static int emc_create(struct hw_handler *hwh, unsigned argc, char **argv) -{ - struct emc_handler *h; - unsigned hr, short_trespass; - - if (argc == 0) { - /* No arguments: use defaults */ - hr = 0; - short_trespass = 0; - } else if (argc != 2) { - DMWARN("incorrect number of arguments"); - return -EINVAL; - } else { - if ((sscanf(argv[0], "%u", &short_trespass) != 1) - || (short_trespass > 1)) { - DMWARN("invalid trespass mode selected"); - return -EINVAL; - } - - if ((sscanf(argv[1], "%u", &hr) != 1) - || (hr > 1)) { - DMWARN("invalid honor reservation flag selected"); - return -EINVAL; - } - } - - h = alloc_emc_handler(); - if (!h) - return -ENOMEM; - - hwh->context = h; - - if ((h->short_trespass = short_trespass)) - DMWARN("short trespass command will be send"); - else - DMWARN("long trespass command will be send"); - - if ((h->hr = hr)) - DMWARN("honor reservation bit will be set"); - else - DMWARN("honor reservation bit will not be set (default)"); - - return 0; -} - -static void emc_destroy(struct hw_handler *hwh) -{ - struct emc_handler *h = (struct emc_handler *) hwh->context; - - kfree(h); - hwh->context = NULL; -} - -static unsigned emc_error(struct hw_handler *hwh, struct bio *bio) -{ - /* FIXME: Patch from axboe still missing */ -#if 0 - int sense; - - if (bio->bi_error & BIO_SENSE) { - sense = bio->bi_error & 0xffffff; /* sense key / asc / ascq */ - - if (sense == 0x020403) { - /* LUN Not Ready - Manual Intervention Required - * indicates this is a passive path. - * - * FIXME: However, if this is seen and EVPD C0 - * indicates that this is due to a NDU in - * progress, we should set FAIL_PATH too. - * This indicates we might have to do a SCSI - * inquiry in the end_io path. Ugh. */ - return MP_BYPASS_PG | MP_RETRY_IO; - } else if (sense == 0x052501) { - /* An array based copy is in progress. Do not - * fail the path, do not bypass to another PG, - * do not retry. Fail the IO immediately. - * (Actually this is the same conclusion as in - * the default handler, but lets make sure.) */ - return 0; - } else if (sense == 0x062900) { - /* Unit Attention Code. This is the first IO - * to the new path, so just retry. */ - return MP_RETRY_IO; - } - } -#endif - - /* Try default handler */ - return dm_scsi_err_handler(hwh, bio); -} - -static struct hw_handler_type emc_hwh = { - .name = "emc", - .module = THIS_MODULE, - .create = emc_create, - .destroy = emc_destroy, - .pg_init = emc_pg_init, - .error = emc_error, -}; - -static int __init dm_emc_init(void) -{ - int r = dm_register_hw_handler(&emc_hwh); - - if (r < 0) - DMERR("register failed %d", r); - - DMINFO("version 0.0.3 loaded"); - - return r; -} - -static void __exit dm_emc_exit(void) -{ - int r = dm_unregister_hw_handler(&emc_hwh); - - if (r < 0) - DMERR("unregister failed %d", r); -} - -module_init(dm_emc_init); -module_exit(dm_emc_exit); - -MODULE_DESCRIPTION(DM_NAME " EMC CX/AX/FC-family multipath"); -MODULE_AUTHOR("Lars Marowsky-Bree <lmb@suse.de>"); -MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c deleted file mode 100644 index 2ee84d8aa0b..00000000000 --- a/drivers/md/dm-hw-handler.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is released under the GPL. - * - * Multipath hardware handler registration. - */ - -#include "dm.h" -#include "dm-hw-handler.h" - -#include <linux/slab.h> - -struct hwh_internal { - struct hw_handler_type hwht; - - struct list_head list; - long use; -}; - -#define hwht_to_hwhi(__hwht) container_of((__hwht), struct hwh_internal, hwht) - -static LIST_HEAD(_hw_handlers); -static DECLARE_RWSEM(_hwh_lock); - -static struct hwh_internal *__find_hw_handler_type(const char *name) -{ - struct hwh_internal *hwhi; - - list_for_each_entry(hwhi, &_hw_handlers, list) { - if (!strcmp(name, hwhi->hwht.name)) - return hwhi; - } - - return NULL; -} - -static struct hwh_internal *get_hw_handler(const char *name) -{ - struct hwh_internal *hwhi; - - down_read(&_hwh_lock); - hwhi = __find_hw_handler_type(name); - if (hwhi) { - if ((hwhi->use == 0) && !try_module_get(hwhi->hwht.module)) - hwhi = NULL; - else - hwhi->use++; - } - up_read(&_hwh_lock); - - return hwhi; -} - -struct hw_handler_type *dm_get_hw_handler(const char *name) -{ - struct hwh_internal *hwhi; - - if (!name) - return NULL; - - hwhi = get_hw_handler(name); - if (!hwhi) { - request_module("dm-%s", name); - hwhi = get_hw_handler(name); - } - - return hwhi ? &hwhi->hwht : NULL; -} - -void dm_put_hw_handler(struct hw_handler_type *hwht) -{ - struct hwh_internal *hwhi; - - if (!hwht) - return; - - down_read(&_hwh_lock); - hwhi = __find_hw_handler_type(hwht->name); - if (!hwhi) - goto out; - - if (--hwhi->use == 0) - module_put(hwhi->hwht.module); - - BUG_ON(hwhi->use < 0); - - out: - up_read(&_hwh_lock); -} - -static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht) -{ - struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL); - - if (hwhi) - hwhi->hwht = *hwht; - - return hwhi; -} - -int dm_register_hw_handler(struct hw_handler_type *hwht) -{ - int r = 0; - struct hwh_internal *hwhi = _alloc_hw_handler(hwht); - - if (!hwhi) - return -ENOMEM; - - down_write(&_hwh_lock); - - if (__find_hw_handler_type(hwht->name)) { - kfree(hwhi); - r = -EEXIST; - } else - list_add(&hwhi->list, &_hw_handlers); - - up_write(&_hwh_lock); - - return r; -} - -int dm_unregister_hw_handler(struct hw_handler_type *hwht) -{ - struct hwh_internal *hwhi; - - down_write(&_hwh_lock); - - hwhi = __find_hw_handler_type(hwht->name); - if (!hwhi) { - up_write(&_hwh_lock); - return -EINVAL; - } - - if (hwhi->use) { - up_write(&_hwh_lock); - return -ETXTBSY; - } - - list_del(&hwhi->list); - - up_write(&_hwh_lock); - - kfree(hwhi); - - return 0; -} - -unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio) -{ -#if 0 - int sense_key, asc, ascq; - - if (bio->bi_error & BIO_SENSE) { - /* FIXME: This is just an initial guess. */ - /* key / asc / ascq */ - sense_key = (bio->bi_error >> 16) & 0xff; - asc = (bio->bi_error >> 8) & 0xff; - ascq = bio->bi_error & 0xff; - - switch (sense_key) { - /* This block as a whole comes from the device. - * So no point retrying on another path. */ - case 0x03: /* Medium error */ - case 0x05: /* Illegal request */ - case 0x07: /* Data protect */ - case 0x08: /* Blank check */ - case 0x0a: /* copy aborted */ - case 0x0c: /* obsolete - no clue ;-) */ - case 0x0d: /* volume overflow */ - case 0x0e: /* data miscompare */ - case 0x0f: /* reserved - no idea either. */ - return MP_ERROR_IO; - - /* For these errors it's unclear whether they - * come from the device or the controller. - * So just lets try a different path, and if - * it eventually succeeds, user-space will clear - * the paths again... */ - case 0x02: /* Not ready */ - case 0x04: /* Hardware error */ - case 0x09: /* vendor specific */ - case 0x0b: /* Aborted command */ - return MP_FAIL_PATH; - - case 0x06: /* Unit attention - might want to decode */ - if (asc == 0x04 && ascq == 0x01) - /* "Unit in the process of - * becoming ready" */ - return 0; - return MP_FAIL_PATH; - - /* FIXME: For Unit Not Ready we may want - * to have a generic pg activation - * feature (START_UNIT). */ - - /* Should these two ever end up in the - * error path? I don't think so. */ - case 0x00: /* No sense */ - case 0x01: /* Recovered error */ - return 0; - } - } -#endif - - /* We got no idea how to decode the other kinds of errors -> - * assume generic error condition. */ - return MP_FAIL_PATH; -} - -EXPORT_SYMBOL_GPL(dm_register_hw_handler); -EXPORT_SYMBOL_GPL(dm_unregister_hw_handler); -EXPORT_SYMBOL_GPL(dm_scsi_err_handler); diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h deleted file mode 100644 index 46809dcb121..00000000000 --- a/drivers/md/dm-hw-handler.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is released under the GPL. - * - * Multipath hardware handler registration. - */ - -#ifndef DM_HW_HANDLER_H -#define DM_HW_HANDLER_H - -#include <linux/device-mapper.h> - -#include "dm-mpath.h" - -struct hw_handler_type; -struct hw_handler { - struct hw_handler_type *type; - struct mapped_device *md; - void *context; -}; - -/* - * Constructs a hardware handler object, takes custom arguments - */ -/* Information about a hardware handler type */ -struct hw_handler_type { - char *name; - struct module *module; - - int (*create) (struct hw_handler *handler, unsigned int argc, - char **argv); - void (*destroy) (struct hw_handler *hwh); - - void (*pg_init) (struct hw_handler *hwh, unsigned bypassed, - struct dm_path *path); - unsigned (*error) (struct hw_handler *hwh, struct bio *bio); - int (*status) (struct hw_handler *hwh, status_type_t type, - char *result, unsigned int maxlen); -}; - -/* Register a hardware handler */ -int dm_register_hw_handler(struct hw_handler_type *type); - -/* Unregister a hardware handler */ -int dm_unregister_hw_handler(struct hw_handler_type *type); - -/* Returns a registered hardware handler type */ -struct hw_handler_type *dm_get_hw_handler(const char *name); - -/* Releases a hardware handler */ -void dm_put_hw_handler(struct hw_handler_type *hwht); - -/* Default err function */ -unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio); - -/* Error flags for err and dm_pg_init_complete */ -#define MP_FAIL_PATH 1 -#define MP_BYPASS_PG 2 -#define MP_ERROR_IO 4 /* Don't retry this I/O */ -#define MP_RETRY 8 - -#endif diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c deleted file mode 100644 index b63a0ab37c5..00000000000 --- a/drivers/md/dm-mpath-hp-sw.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (C) 2005 Mike Christie, All rights reserved. - * Copyright (C) 2007 Red Hat, Inc. All rights reserved. - * Authors: Mike Christie - * Dave Wysochanski - * - * This file is released under the GPL. - * - * This module implements the specific path activation code for - * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive) - * storage arrays. - * These storage arrays have controller-based failover, not - * LUN-based failover. However, LUN-based failover is the design - * of dm-multipath. Thus, this module is written for LUN-based failover. - */ -#include <linux/blkdev.h> -#include <linux/list.h> -#include <linux/types.h> -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_dbg.h> - -#include "dm.h" -#include "dm-hw-handler.h" - -#define DM_MSG_PREFIX "multipath hp-sw" -#define DM_HP_HWH_NAME "hp-sw" -#define DM_HP_HWH_VER "1.0.0" - -struct hp_sw_context { - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; -}; - -/* - * hp_sw_error_is_retryable - Is an HP-specific check condition retryable? - * @req: path activation request - * - * Examine error codes of request and determine whether the error is retryable. - * Some error codes are already retried by scsi-ml (see - * scsi_decide_disposition), but some HP specific codes are not. - * The intent of this routine is to supply the logic for the HP specific - * check conditions. - * - * Returns: - * 1 - command completed with retryable error - * 0 - command completed with non-retryable error - * - * Possible optimizations - * 1. More hardware-specific error codes - */ -static int hp_sw_error_is_retryable(struct request *req) -{ - /* - * NOT_READY is known to be retryable - * For now we just dump out the sense data and call it retryable - */ - if (status_byte(req->errors) == CHECK_CONDITION) - __scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len); - - /* - * At this point we don't have complete information about all the error - * codes from this hardware, so we are just conservative and retry - * when in doubt. - */ - return 1; -} - -/* - * hp_sw_end_io - Completion handler for HP path activation. - * @req: path activation request - * @error: scsi-ml error - * - * Check sense data, free request structure, and notify dm that - * pg initialization has completed. - * - * Context: scsi-ml softirq - * - */ -static void hp_sw_end_io(struct request *req, int error) -{ - struct dm_path *path = req->end_io_data; - unsigned err_flags = 0; - - if (!error) { - DMDEBUG("%s path activation command - success", - path->dev->name); - goto out; - } - - if (hp_sw_error_is_retryable(req)) { - DMDEBUG("%s path activation command - retry", - path->dev->name); - err_flags = MP_RETRY; - goto out; - } - - DMWARN("%s path activation fail - error=0x%x", - path->dev->name, error); - err_flags = MP_FAIL_PATH; - -out: - req->end_io_data = NULL; - __blk_put_request(req->q, req); - dm_pg_init_complete(path, err_flags); -} - -/* - * hp_sw_get_request - Allocate an HP specific path activation request - * @path: path on which request will be sent (needed for request queue) - * - * The START command is used for path activation request. - * These arrays are controller-based failover, not LUN based. - * One START command issued to a single path will fail over all - * LUNs for the same controller. - * - * Possible optimizations - * 1. Make timeout configurable - * 2. Preallocate request - */ -static struct request *hp_sw_get_request(struct dm_path *path) -{ - struct request *req; - struct block_device *bdev = path->dev->bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct hp_sw_context *h = path->hwhcontext; - - req = blk_get_request(q, WRITE, GFP_NOIO); - if (!req) - goto out; - - req->timeout = 60 * HZ; - - req->errors = 0; - req->cmd_type = REQ_TYPE_BLOCK_PC; - req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; - req->end_io_data = path; - req->sense = h->sense; - memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); - - req->cmd[0] = START_STOP; - req->cmd[4] = 1; - req->cmd_len = COMMAND_SIZE(req->cmd[0]); - -out: - return req; -} - -/* - * hp_sw_pg_init - HP path activation implementation. - * @hwh: hardware handler specific data - * @bypassed: unused; is the path group bypassed? (see dm-mpath.c) - * @path: path to send initialization command - * - * Send an HP-specific path activation command on 'path'. - * Do not try to optimize in any way, just send the activation command. - * More than one path activation command may be sent to the same controller. - * This seems to work fine for basic failover support. - * - * Possible optimizations - * 1. Detect an in-progress activation request and avoid submitting another one - * 2. Model the controller and only send a single activation request at a time - * 3. Determine the state of a path before sending an activation request - * - * Context: kmpathd (see process_queued_ios() in dm-mpath.c) - */ -static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed, - struct dm_path *path) -{ - struct request *req; - struct hp_sw_context *h; - - path->hwhcontext = hwh->context; - h = hwh->context; - - req = hp_sw_get_request(path); - if (!req) { - DMERR("%s path activation command - allocation fail", - path->dev->name); - goto retry; - } - - DMDEBUG("%s path activation command - sent", path->dev->name); - - blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io); - return; - -retry: - dm_pg_init_complete(path, MP_RETRY); -} - -static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv) -{ - struct hp_sw_context *h; - - h = kmalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return -ENOMEM; - - hwh->context = h; - - return 0; -} - -static void hp_sw_destroy(struct hw_handler *hwh) -{ - struct hp_sw_context *h = hwh->context; - - kfree(h); -} - -static struct hw_handler_type hp_sw_hwh = { - .name = DM_HP_HWH_NAME, - .module = THIS_MODULE, - .create = hp_sw_create, - .destroy = hp_sw_destroy, - .pg_init = hp_sw_pg_init, -}; - -static int __init hp_sw_init(void) -{ - int r; - - r = dm_register_hw_handler(&hp_sw_hwh); - if (r < 0) - DMERR("register failed %d", r); - else - DMINFO("version " DM_HP_HWH_VER " loaded"); - - return r; -} - -static void __exit hp_sw_exit(void) -{ - int r; - - r = dm_unregister_hw_handler(&hp_sw_hwh); - if (r < 0) - DMERR("unregister failed %d", r); -} - -module_init(hp_sw_init); -module_exit(hp_sw_exit); - -MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support"); -MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DM_HP_HWH_VER); diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c deleted file mode 100644 index 95e77734880..00000000000 --- a/drivers/md/dm-mpath-rdac.c +++ /dev/null @@ -1,700 +0,0 @@ -/* - * Engenio/LSI RDAC DM HW handler - * - * Copyright (C) 2005 Mike Christie. All rights reserved. - * Copyright (C) Chandra Seetharaman, IBM Corp. 2007 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_eh.h> - -#define DM_MSG_PREFIX "multipath rdac" - -#include "dm.h" -#include "dm-hw-handler.h" - -#define RDAC_DM_HWH_NAME "rdac" -#define RDAC_DM_HWH_VER "0.4" - -/* - * LSI mode page stuff - * - * These struct definitions and the forming of the - * mode page were taken from the LSI RDAC 2.4 GPL'd - * driver, and then converted to Linux conventions. - */ -#define RDAC_QUIESCENCE_TIME 20; -/* - * Page Codes - */ -#define RDAC_PAGE_CODE_REDUNDANT_CONTROLLER 0x2c - -/* - * Controller modes definitions - */ -#define RDAC_MODE_TRANSFER_ALL_LUNS 0x01 -#define RDAC_MODE_TRANSFER_SPECIFIED_LUNS 0x02 - -/* - * RDAC Options field - */ -#define RDAC_FORCED_QUIESENCE 0x02 - -#define RDAC_FAILOVER_TIMEOUT (60 * HZ) - -struct rdac_mode_6_hdr { - u8 data_len; - u8 medium_type; - u8 device_params; - u8 block_desc_len; -}; - -struct rdac_mode_10_hdr { - u16 data_len; - u8 medium_type; - u8 device_params; - u16 reserved; - u16 block_desc_len; -}; - -struct rdac_mode_common { - u8 controller_serial[16]; - u8 alt_controller_serial[16]; - u8 rdac_mode[2]; - u8 alt_rdac_mode[2]; - u8 quiescence_timeout; - u8 rdac_options; -}; - -struct rdac_pg_legacy { - struct rdac_mode_6_hdr hdr; - u8 page_code; - u8 page_len; - struct rdac_mode_common common; -#define MODE6_MAX_LUN 32 - u8 lun_table[MODE6_MAX_LUN]; - u8 reserved2[32]; - u8 reserved3; - u8 reserved4; -}; - -struct rdac_pg_expanded { - struct rdac_mode_10_hdr hdr; - u8 page_code; - u8 subpage_code; - u8 page_len[2]; - struct rdac_mode_common common; - u8 lun_table[256]; - u8 reserved3; - u8 reserved4; -}; - -struct c9_inquiry { - u8 peripheral_info; - u8 page_code; /* 0xC9 */ - u8 reserved1; - u8 page_len; - u8 page_id[4]; /* "vace" */ - u8 avte_cvp; - u8 path_prio; - u8 reserved2[38]; -}; - -#define SUBSYS_ID_LEN 16 -#define SLOT_ID_LEN 2 - -struct c4_inquiry { - u8 peripheral_info; - u8 page_code; /* 0xC4 */ - u8 reserved1; - u8 page_len; - u8 page_id[4]; /* "subs" */ - u8 subsys_id[SUBSYS_ID_LEN]; - u8 revision[4]; - u8 slot_id[SLOT_ID_LEN]; - u8 reserved[2]; -}; - -struct rdac_controller { - u8 subsys_id[SUBSYS_ID_LEN]; - u8 slot_id[SLOT_ID_LEN]; - int use_10_ms; - struct kref kref; - struct list_head node; /* list of all controllers */ - spinlock_t lock; - int submitted; - struct list_head cmd_list; /* list of commands to be submitted */ - union { - struct rdac_pg_legacy legacy; - struct rdac_pg_expanded expanded; - } mode_select; -}; -struct c8_inquiry { - u8 peripheral_info; - u8 page_code; /* 0xC8 */ - u8 reserved1; - u8 page_len; - u8 page_id[4]; /* "edid" */ - u8 reserved2[3]; - u8 vol_uniq_id_len; - u8 vol_uniq_id[16]; - u8 vol_user_label_len; - u8 vol_user_label[60]; - u8 array_uniq_id_len; - u8 array_unique_id[16]; - u8 array_user_label_len; - u8 array_user_label[60]; - u8 lun[8]; -}; - -struct c2_inquiry { - u8 peripheral_info; - u8 page_code; /* 0xC2 */ - u8 reserved1; - u8 page_len; - u8 page_id[4]; /* "swr4" */ - u8 sw_version[3]; - u8 sw_date[3]; - u8 features_enabled; - u8 max_lun_supported; - u8 partitions[239]; /* Total allocation length should be 0xFF */ -}; - -struct rdac_handler { - struct list_head entry; /* list waiting to submit MODE SELECT */ - unsigned timeout; - struct rdac_controller *ctlr; -#define UNINITIALIZED_LUN (1 << 8) - unsigned lun; - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; - struct dm_path *path; - struct work_struct work; -#define SEND_C2_INQUIRY 1 -#define SEND_C4_INQUIRY 2 -#define SEND_C8_INQUIRY 3 -#define SEND_C9_INQUIRY 4 -#define SEND_MODE_SELECT 5 - int cmd_to_send; - union { - struct c2_inquiry c2; - struct c4_inquiry c4; - struct c8_inquiry c8; - struct c9_inquiry c9; - } inq; -}; - -static LIST_HEAD(ctlr_list); -static DEFINE_SPINLOCK(list_lock); -static struct workqueue_struct *rdac_wkqd; - -static inline int had_failures(struct request *req, int error) -{ - return (error || host_byte(req->errors) != DID_OK || - msg_byte(req->errors) != COMMAND_COMPLETE); -} - -static void rdac_resubmit_all(struct rdac_handler *h) -{ - struct rdac_controller *ctlr = h->ctlr; - struct rdac_handler *tmp, *h1; - - spin_lock(&ctlr->lock); - list_for_each_entry_safe(h1, tmp, &ctlr->cmd_list, entry) { - h1->cmd_to_send = SEND_C9_INQUIRY; - queue_work(rdac_wkqd, &h1->work); - list_del(&h1->entry); - } - ctlr->submitted = 0; - spin_unlock(&ctlr->lock); -} - -static void mode_select_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct scsi_sense_hdr sense_hdr; - int sense = 0, fail = 0; - - if (had_failures(req, error)) { - fail = 1; - goto failed; - } - - if (status_byte(req->errors) == CHECK_CONDITION) { - scsi_normalize_sense(req->sense, SCSI_SENSE_BUFFERSIZE, - &sense_hdr); - sense = (sense_hdr.sense_key << 16) | (sense_hdr.asc << 8) | - sense_hdr.ascq; - /* If it is retryable failure, submit the c9 inquiry again */ - if (sense == 0x59136 || sense == 0x68b02 || sense == 0xb8b02 || - sense == 0x62900) { - /* 0x59136 - Command lock contention - * 0x[6b]8b02 - Quiesense in progress or achieved - * 0x62900 - Power On, Reset, or Bus Device Reset - */ - h->cmd_to_send = SEND_C9_INQUIRY; - queue_work(rdac_wkqd, &h->work); - goto done; - } - if (sense) - DMINFO("MODE_SELECT failed on %s with sense 0x%x", - h->path->dev->name, sense); - } -failed: - if (fail || sense) - dm_pg_init_complete(h->path, MP_FAIL_PATH); - else - dm_pg_init_complete(h->path, 0); - -done: - rdac_resubmit_all(h); - __blk_put_request(req->q, req); -} - -static struct request *get_rdac_req(struct rdac_handler *h, - void *buffer, unsigned buflen, int rw) -{ - struct request *rq; - struct request_queue *q = bdev_get_queue(h->path->dev->bdev); - - rq = blk_get_request(q, rw, GFP_KERNEL); - - if (!rq) { - DMINFO("get_rdac_req: blk_get_request failed"); - return NULL; - } - - if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_KERNEL)) { - blk_put_request(rq); - DMINFO("get_rdac_req: blk_rq_map_kern failed"); - return NULL; - } - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = 0; - - rq->end_io_data = h; - rq->timeout = h->timeout; - rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; - return rq; -} - -static struct request *rdac_failover_get(struct rdac_handler *h) -{ - struct request *rq; - struct rdac_mode_common *common; - unsigned data_size; - - if (h->ctlr->use_10_ms) { - struct rdac_pg_expanded *rdac_pg; - - data_size = sizeof(struct rdac_pg_expanded); - rdac_pg = &h->ctlr->mode_select.expanded; - memset(rdac_pg, 0, data_size); - common = &rdac_pg->common; - rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40; - rdac_pg->subpage_code = 0x1; - rdac_pg->page_len[0] = 0x01; - rdac_pg->page_len[1] = 0x28; - rdac_pg->lun_table[h->lun] = 0x81; - } else { - struct rdac_pg_legacy *rdac_pg; - - data_size = sizeof(struct rdac_pg_legacy); - rdac_pg = &h->ctlr->mode_select.legacy; - memset(rdac_pg, 0, data_size); - common = &rdac_pg->common; - rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER; - rdac_pg->page_len = 0x68; - rdac_pg->lun_table[h->lun] = 0x81; - } - common->rdac_mode[1] = RDAC_MODE_TRANSFER_SPECIFIED_LUNS; - common->quiescence_timeout = RDAC_QUIESCENCE_TIME; - common->rdac_options = RDAC_FORCED_QUIESENCE; - - /* get request for block layer packet command */ - rq = get_rdac_req(h, &h->ctlr->mode_select, data_size, WRITE); - if (!rq) { - DMERR("rdac_failover_get: no rq"); - return NULL; - } - - /* Prepare the command. */ - if (h->ctlr->use_10_ms) { - rq->cmd[0] = MODE_SELECT_10; - rq->cmd[7] = data_size >> 8; - rq->cmd[8] = data_size & 0xff; - } else { - rq->cmd[0] = MODE_SELECT; - rq->cmd[4] = data_size; - } - rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); - - return rq; -} - -/* Acquires h->ctlr->lock */ -static void submit_mode_select(struct rdac_handler *h) -{ - struct request *rq; - struct request_queue *q = bdev_get_queue(h->path->dev->bdev); - - spin_lock(&h->ctlr->lock); - if (h->ctlr->submitted) { - list_add(&h->entry, &h->ctlr->cmd_list); - goto drop_lock; - } - - if (!q) { - DMINFO("submit_mode_select: no queue"); - goto fail_path; - } - - rq = rdac_failover_get(h); - if (!rq) { - DMERR("submit_mode_select: no rq"); - goto fail_path; - } - - DMINFO("queueing MODE_SELECT command on %s", h->path->dev->name); - - blk_execute_rq_nowait(q, NULL, rq, 1, mode_select_endio); - h->ctlr->submitted = 1; - goto drop_lock; -fail_path: - dm_pg_init_complete(h->path, MP_FAIL_PATH); -drop_lock: - spin_unlock(&h->ctlr->lock); -} - -static void release_ctlr(struct kref *kref) -{ - struct rdac_controller *ctlr; - ctlr = container_of(kref, struct rdac_controller, kref); - - spin_lock(&list_lock); - list_del(&ctlr->node); - spin_unlock(&list_lock); - kfree(ctlr); -} - -static struct rdac_controller *get_controller(u8 *subsys_id, u8 *slot_id) -{ - struct rdac_controller *ctlr, *tmp; - - spin_lock(&list_lock); - - list_for_each_entry(tmp, &ctlr_list, node) { - if ((memcmp(tmp->subsys_id, subsys_id, SUBSYS_ID_LEN) == 0) && - (memcmp(tmp->slot_id, slot_id, SLOT_ID_LEN) == 0)) { - kref_get(&tmp->kref); - spin_unlock(&list_lock); - return tmp; - } - } - ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC); - if (!ctlr) - goto done; - - /* initialize fields of controller */ - memcpy(ctlr->subsys_id, subsys_id, SUBSYS_ID_LEN); - memcpy(ctlr->slot_id, slot_id, SLOT_ID_LEN); - kref_init(&ctlr->kref); - spin_lock_init(&ctlr->lock); - ctlr->submitted = 0; - ctlr->use_10_ms = -1; - INIT_LIST_HEAD(&ctlr->cmd_list); - list_add(&ctlr->node, &ctlr_list); -done: - spin_unlock(&list_lock); - return ctlr; -} - -static void c4_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct c4_inquiry *sp; - - if (had_failures(req, error)) { - dm_pg_init_complete(h->path, MP_FAIL_PATH); - goto done; - } - - sp = &h->inq.c4; - - h->ctlr = get_controller(sp->subsys_id, sp->slot_id); - - if (h->ctlr) { - h->cmd_to_send = SEND_C9_INQUIRY; - queue_work(rdac_wkqd, &h->work); - } else - dm_pg_init_complete(h->path, MP_FAIL_PATH); -done: - __blk_put_request(req->q, req); -} - -static void c2_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct c2_inquiry *sp; - - if (had_failures(req, error)) { - dm_pg_init_complete(h->path, MP_FAIL_PATH); - goto done; - } - - sp = &h->inq.c2; - - /* If more than MODE6_MAX_LUN luns are supported, use mode select 10 */ - if (sp->max_lun_supported >= MODE6_MAX_LUN) - h->ctlr->use_10_ms = 1; - else - h->ctlr->use_10_ms = 0; - - h->cmd_to_send = SEND_MODE_SELECT; - queue_work(rdac_wkqd, &h->work); -done: - __blk_put_request(req->q, req); -} - -static void c9_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct c9_inquiry *sp; - - if (had_failures(req, error)) { - dm_pg_init_complete(h->path, MP_FAIL_PATH); - goto done; - } - - /* We need to look at the sense keys here to take clear action. - * For now simple logic: If the host is in AVT mode or if controller - * owns the lun, return dm_pg_init_complete(), otherwise submit - * MODE SELECT. - */ - sp = &h->inq.c9; - - /* If in AVT mode, return success */ - if ((sp->avte_cvp >> 7) == 0x1) { - dm_pg_init_complete(h->path, 0); - goto done; - } - - /* If the controller on this path owns the LUN, return success */ - if (sp->avte_cvp & 0x1) { - dm_pg_init_complete(h->path, 0); - goto done; - } - - if (h->ctlr) { - if (h->ctlr->use_10_ms == -1) - h->cmd_to_send = SEND_C2_INQUIRY; - else - h->cmd_to_send = SEND_MODE_SELECT; - } else - h->cmd_to_send = SEND_C4_INQUIRY; - queue_work(rdac_wkqd, &h->work); -done: - __blk_put_request(req->q, req); -} - -static void c8_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct c8_inquiry *sp; - - if (had_failures(req, error)) { - dm_pg_init_complete(h->path, MP_FAIL_PATH); - goto done; - } - - /* We need to look at the sense keys here to take clear action. - * For now simple logic: Get the lun from the inquiry page. - */ - sp = &h->inq.c8; - h->lun = sp->lun[7]; /* currently it uses only one byte */ - h->cmd_to_send = SEND_C9_INQUIRY; - queue_work(rdac_wkqd, &h->work); -done: - __blk_put_request(req->q, req); -} - -static void submit_inquiry(struct rdac_handler *h, int page_code, - unsigned int len, rq_end_io_fn endio) -{ - struct request *rq; - struct request_queue *q = bdev_get_queue(h->path->dev->bdev); - - if (!q) - goto fail_path; - - rq = get_rdac_req(h, &h->inq, len, READ); - if (!rq) - goto fail_path; - - /* Prepare the command. */ - rq->cmd[0] = INQUIRY; - rq->cmd[1] = 1; - rq->cmd[2] = page_code; - rq->cmd[4] = len; - rq->cmd_len = COMMAND_SIZE(INQUIRY); - blk_execute_rq_nowait(q, NULL, rq, 1, endio); - return; - -fail_path: - dm_pg_init_complete(h->path, MP_FAIL_PATH); -} - -static void service_wkq(struct work_struct *work) -{ - struct rdac_handler *h = container_of(work, struct rdac_handler, work); - - switch (h->cmd_to_send) { - case SEND_C2_INQUIRY: - submit_inquiry(h, 0xC2, sizeof(struct c2_inquiry), c2_endio); - break; - case SEND_C4_INQUIRY: - submit_inquiry(h, 0xC4, sizeof(struct c4_inquiry), c4_endio); - break; - case SEND_C8_INQUIRY: - submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio); - break; - case SEND_C9_INQUIRY: - submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio); - break; - case SEND_MODE_SELECT: - submit_mode_select(h); - break; - default: - BUG(); - } -} -/* - * only support subpage2c until we confirm that this is just a matter of - * of updating firmware or not, and RDAC (basic AVT works already) for now - * but we can add these in in when we get time and testers - */ -static int rdac_create(struct hw_handler *hwh, unsigned argc, char **argv) -{ - struct rdac_handler *h; - unsigned timeout; - - if (argc == 0) { - /* No arguments: use defaults */ - timeout = RDAC_FAILOVER_TIMEOUT; - } else if (argc != 1) { - DMWARN("incorrect number of arguments"); - return -EINVAL; - } else { - if (sscanf(argv[1], "%u", &timeout) != 1) { - DMWARN("invalid timeout value"); - return -EINVAL; - } - } - - h = kzalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return -ENOMEM; - - hwh->context = h; - h->timeout = timeout; - h->lun = UNINITIALIZED_LUN; - INIT_WORK(&h->work, service_wkq); - DMWARN("using RDAC command with timeout %u", h->timeout); - - return 0; -} - -static void rdac_destroy(struct hw_handler *hwh) -{ - struct rdac_handler *h = hwh->context; - - if (h->ctlr) - kref_put(&h->ctlr->kref, release_ctlr); - kfree(h); - hwh->context = NULL; -} - -static unsigned rdac_error(struct hw_handler *hwh, struct bio *bio) -{ - /* Try default handler */ - return dm_scsi_err_handler(hwh, bio); -} - -static void rdac_pg_init(struct hw_handler *hwh, unsigned bypassed, - struct dm_path *path) -{ - struct rdac_handler *h = hwh->context; - - h->path = path; - switch (h->lun) { - case UNINITIALIZED_LUN: - submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio); - break; - default: - submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio); - } -} - -static struct hw_handler_type rdac_handler = { - .name = RDAC_DM_HWH_NAME, - .module = THIS_MODULE, - .create = rdac_create, - .destroy = rdac_destroy, - .pg_init = rdac_pg_init, - .error = rdac_error, -}; - -static int __init rdac_init(void) -{ - int r; - - rdac_wkqd = create_singlethread_workqueue("rdac_wkqd"); - if (!rdac_wkqd) { - DMERR("Failed to create workqueue rdac_wkqd."); - return -ENOMEM; - } - - r = dm_register_hw_handler(&rdac_handler); - if (r < 0) { - DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r); - destroy_workqueue(rdac_wkqd); - return r; - } - - DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER); - return 0; -} - -static void __exit rdac_exit(void) -{ - int r = dm_unregister_hw_handler(&rdac_handler); - - destroy_workqueue(rdac_wkqd); - if (r < 0) - DMERR("%s: unregister failed %d", RDAC_DM_HWH_NAME, r); -} - -module_init(rdac_init); -module_exit(rdac_exit); - -MODULE_DESCRIPTION("DM Multipath LSI/Engenio RDAC support"); -MODULE_AUTHOR("Mike Christie, Chandra Seetharaman"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(RDAC_DM_HWH_VER); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e7ee59e655d..9f7302d4878 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -7,7 +7,6 @@ #include "dm.h" #include "dm-path-selector.h" -#include "dm-hw-handler.h" #include "dm-bio-list.h" #include "dm-bio-record.h" #include "dm-uevent.h" @@ -20,6 +19,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/workqueue.h> +#include <scsi/scsi_dh.h> #include <asm/atomic.h> #define DM_MSG_PREFIX "multipath" @@ -61,7 +61,8 @@ struct multipath { spinlock_t lock; - struct hw_handler hw_handler; + const char *hw_handler_name; + struct work_struct activate_path; unsigned nr_priority_groups; struct list_head priority_groups; unsigned pg_init_required; /* pg_init needs calling? */ @@ -106,9 +107,10 @@ typedef int (*action_fn) (struct pgpath *pgpath); static struct kmem_cache *_mpio_cache; -static struct workqueue_struct *kmultipathd; +static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void process_queued_ios(struct work_struct *work); static void trigger_event(struct work_struct *work); +static void activate_path(struct work_struct *work); /*----------------------------------------------- @@ -178,6 +180,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) m->queue_io = 1; INIT_WORK(&m->process_queued_ios, process_queued_ios); INIT_WORK(&m->trigger_event, trigger_event); + INIT_WORK(&m->activate_path, activate_path); m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); if (!m->mpio_pool) { kfree(m); @@ -193,18 +196,13 @@ static struct multipath *alloc_multipath(struct dm_target *ti) static void free_multipath(struct multipath *m) { struct priority_group *pg, *tmp; - struct hw_handler *hwh = &m->hw_handler; list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { list_del(&pg->list); free_priority_group(pg, m->ti); } - if (hwh->type) { - hwh->type->destroy(hwh); - dm_put_hw_handler(hwh->type); - } - + kfree(m->hw_handler_name); mempool_destroy(m->mpio_pool); kfree(m); } @@ -216,12 +214,10 @@ static void free_multipath(struct multipath *m) static void __switch_pg(struct multipath *m, struct pgpath *pgpath) { - struct hw_handler *hwh = &m->hw_handler; - m->current_pg = pgpath->pg; /* Must we initialise the PG first, and queue I/O till it's ready? */ - if (hwh->type && hwh->type->pg_init) { + if (m->hw_handler_name) { m->pg_init_required = 1; m->queue_io = 1; } else { @@ -409,7 +405,6 @@ static void process_queued_ios(struct work_struct *work) { struct multipath *m = container_of(work, struct multipath, process_queued_ios); - struct hw_handler *hwh = &m->hw_handler; struct pgpath *pgpath = NULL; unsigned init_required = 0, must_queue = 1; unsigned long flags; @@ -439,7 +434,7 @@ out: spin_unlock_irqrestore(&m->lock, flags); if (init_required) - hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path); + queue_work(kmpath_handlerd, &m->activate_path); if (!must_queue) dispatch_queued_ios(m); @@ -652,8 +647,6 @@ static struct priority_group *parse_priority_group(struct arg_set *as, static int parse_hw_handler(struct arg_set *as, struct multipath *m) { - int r; - struct hw_handler_type *hwht; unsigned hw_argc; struct dm_target *ti = m->ti; @@ -661,30 +654,20 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m) {0, 1024, "invalid number of hardware handler args"}, }; - r = read_param(_params, shift(as), &hw_argc, &ti->error); - if (r) + if (read_param(_params, shift(as), &hw_argc, &ti->error)) return -EINVAL; if (!hw_argc) return 0; - hwht = dm_get_hw_handler(shift(as)); - if (!hwht) { + m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); + request_module("scsi_dh_%s", m->hw_handler_name); + if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { ti->error = "unknown hardware handler type"; + kfree(m->hw_handler_name); + m->hw_handler_name = NULL; return -EINVAL; } - - m->hw_handler.md = dm_table_get_md(ti->table); - dm_put(m->hw_handler.md); - - r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv); - if (r) { - dm_put_hw_handler(hwht); - ti->error = "hardware handler constructor failed"; - return r; - } - - m->hw_handler.type = hwht; consume(as, hw_argc - 1); return 0; @@ -808,6 +791,7 @@ static void multipath_dtr(struct dm_target *ti) { struct multipath *m = (struct multipath *) ti->private; + flush_workqueue(kmpath_handlerd); flush_workqueue(kmultipathd); free_multipath(m); } @@ -1025,52 +1009,85 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) return limit_reached; } -/* - * pg_init must call this when it has completed its initialisation - */ -void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) +static void pg_init_done(struct dm_path *path, int errors) { struct pgpath *pgpath = path_to_pgpath(path); struct priority_group *pg = pgpath->pg; struct multipath *m = pg->m; unsigned long flags; - /* - * If requested, retry pg_init until maximum number of retries exceeded. - * If retry not requested and PG already bypassed, always fail the path. - */ - if (err_flags & MP_RETRY) { - if (pg_init_limit_reached(m, pgpath)) - err_flags |= MP_FAIL_PATH; - } else if (err_flags && pg->bypassed) - err_flags |= MP_FAIL_PATH; - - if (err_flags & MP_FAIL_PATH) + /* device or driver problems */ + switch (errors) { + case SCSI_DH_OK: + break; + case SCSI_DH_NOSYS: + if (!m->hw_handler_name) { + errors = 0; + break; + } + DMERR("Cannot failover device because scsi_dh_%s was not " + "loaded.", m->hw_handler_name); + /* + * Fail path for now, so we do not ping pong + */ fail_path(pgpath); - - if (err_flags & MP_BYPASS_PG) + break; + case SCSI_DH_DEV_TEMP_BUSY: + /* + * Probably doing something like FW upgrade on the + * controller so try the other pg. + */ bypass_pg(m, pg, 1); + break; + /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */ + case SCSI_DH_RETRY: + case SCSI_DH_IMM_RETRY: + case SCSI_DH_RES_TEMP_UNAVAIL: + if (pg_init_limit_reached(m, pgpath)) + fail_path(pgpath); + errors = 0; + break; + default: + /* + * We probably do not want to fail the path for a device + * error, but this is what the old dm did. In future + * patches we can do more advanced handling. + */ + fail_path(pgpath); + } spin_lock_irqsave(&m->lock, flags); - if (err_flags & ~MP_RETRY) { + if (errors) { + DMERR("Could not failover device. Error %d.", errors); m->current_pgpath = NULL; m->current_pg = NULL; - } else if (!m->pg_init_required) + } else if (!m->pg_init_required) { m->queue_io = 0; + pg->bypassed = 0; + } m->pg_init_in_progress = 0; queue_work(kmultipathd, &m->process_queued_ios); spin_unlock_irqrestore(&m->lock, flags); } +static void activate_path(struct work_struct *work) +{ + int ret; + struct multipath *m = + container_of(work, struct multipath, activate_path); + struct dm_path *path = &m->current_pgpath->path; + + ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev)); + pg_init_done(path, ret); +} + /* * end_io handling */ static int do_end_io(struct multipath *m, struct bio *bio, int error, struct dm_mpath_io *mpio) { - struct hw_handler *hwh = &m->hw_handler; - unsigned err_flags = MP_FAIL_PATH; /* Default behavior */ unsigned long flags; if (!error) @@ -1097,19 +1114,8 @@ static int do_end_io(struct multipath *m, struct bio *bio, } spin_unlock_irqrestore(&m->lock, flags); - if (hwh->type && hwh->type->error) - err_flags = hwh->type->error(hwh, bio); - - if (mpio->pgpath) { - if (err_flags & MP_FAIL_PATH) - fail_path(mpio->pgpath); - - if (err_flags & MP_BYPASS_PG) - bypass_pg(m, mpio->pgpath->pg, 1); - } - - if (err_flags & MP_ERROR_IO) - return -EIO; + if (mpio->pgpath) + fail_path(mpio->pgpath); requeue: dm_bio_restore(&mpio->details, bio); @@ -1194,7 +1200,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type, int sz = 0; unsigned long flags; struct multipath *m = (struct multipath *) ti->private; - struct hw_handler *hwh = &m->hw_handler; struct priority_group *pg; struct pgpath *p; unsigned pg_num; @@ -1214,12 +1219,10 @@ static int multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("pg_init_retries %u ", m->pg_init_retries); } - if (hwh->type && hwh->type->status) - sz += hwh->type->status(hwh, type, result + sz, maxlen - sz); - else if (!hwh->type || type == STATUSTYPE_INFO) + if (!m->hw_handler_name || type == STATUSTYPE_INFO) DMEMIT("0 "); else - DMEMIT("1 %s ", hwh->type->name); + DMEMIT("1 %s ", m->hw_handler_name); DMEMIT("%u ", m->nr_priority_groups); @@ -1422,6 +1425,21 @@ static int __init dm_multipath_init(void) return -ENOMEM; } + /* + * A separate workqueue is used to handle the device handlers + * to avoid overloading existing workqueue. Overloading the + * old workqueue would also create a bottleneck in the + * path of the storage hardware device activation. + */ + kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd"); + if (!kmpath_handlerd) { + DMERR("failed to create workqueue kmpath_handlerd"); + destroy_workqueue(kmultipathd); + dm_unregister_target(&multipath_target); + kmem_cache_destroy(_mpio_cache); + return -ENOMEM; + } + DMINFO("version %u.%u.%u loaded", multipath_target.version[0], multipath_target.version[1], multipath_target.version[2]); @@ -1433,6 +1451,7 @@ static void __exit dm_multipath_exit(void) { int r; + destroy_workqueue(kmpath_handlerd); destroy_workqueue(kmultipathd); r = dm_unregister_target(&multipath_target); @@ -1441,8 +1460,6 @@ static void __exit dm_multipath_exit(void) kmem_cache_destroy(_mpio_cache); } -EXPORT_SYMBOL_GPL(dm_pg_init_complete); - module_init(dm_multipath_init); module_exit(dm_multipath_exit); diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h index b9cdcbb3ed5..c198b856a45 100644 --- a/drivers/md/dm-mpath.h +++ b/drivers/md/dm-mpath.h @@ -16,7 +16,6 @@ struct dm_path { unsigned is_active; /* Read-only */ void *pscontext; /* For path-selector use */ - void *hwhcontext; /* For hw-handler use */ }; /* Callback for hwh_pg_init_fn to use when complete */ diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 10748240cb2..6a866d7c8ae 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -50,17 +50,19 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) /** * linear_mergeable_bvec -- tell bio layer if two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bvm: properties of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can take at this offset */ -static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) +static int linear_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; dev_info_t *dev0; - unsigned long maxsectors, bio_sectors = bio->bi_size >> 9; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); dev0 = which_dev(mddev, sector); maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 914c04ddec7..bcbb82594a1 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -241,18 +241,20 @@ static int create_strip_zones (mddev_t *mddev) /** * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bvm: properties of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can accept at this offset */ -static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) +static int raid0_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a71277b640a..22bb2b1b886 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -439,26 +439,27 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev) /** * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bvm: properties of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can accept at this offset * If near_copies == raid_disk, there are no striping issues, * but in that case, the function isn't called at all. */ -static int raid10_mergeable_bvec(struct request_queue *q, struct bio *bio, - struct bio_vec *bio_vec) +static int raid10_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ - if (max <= bio_vec->bv_len && bio_sectors == 0) - return bio_vec->bv_len; + if (max <= biovec->bv_len && bio_sectors == 0) + return biovec->bv_len; else return max; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 54c8ee28fcc..9ce7154845c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2017,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, */ s->uptodate++; return 0; /* uptodate + compute == disks */ - } else if ((s->uptodate < disks - 1) && - test_bit(R5_Insync, &dev->flags)) { - /* Note: we hold off compute operations while checks are - * in flight, but we still prefer 'compute' over 'read' - * hence we only read if (uptodate < * disks-1) - */ + } else if (test_bit(R5_Insync, &dev->flags)) { set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantread, &dev->flags); if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) @@ -3319,15 +3314,17 @@ static int raid5_congested(void *data, int bits) /* We want read requests to align with chunks where possible, * but write requests don't need to. */ -static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) +static int raid5_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bvm->bi_size >> 9; - if (bio_data_dir(bio) == WRITE) + if ((bvm->bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; |