diff options
Diffstat (limited to 'drivers/block')
40 files changed, 1370 insertions, 1311 deletions
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 748dea4f34d..758da2287d9 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1406,7 +1406,7 @@ next_segment: track = block / (floppy->dtype->sects * floppy->type->sect_mult); sector = block % (floppy->dtype->sects * floppy->type->sect_mult); - data = rq->buffer + 512 * cnt; + data = bio_data(rq->bio) + 512 * cnt; #ifdef DEBUG printk("access to track %d, sector %d, with buffer at " "0x%08lx\n", track, sector, data); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 96b629e1f0c..2104b1b4ccd 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1484,7 +1484,7 @@ repeat: ReqCnt = 0; ReqCmd = rq_data_dir(fd_request); ReqBlock = blk_rq_pos(fd_request); - ReqBuffer = fd_request->buffer; + ReqBuffer = bio_data(fd_request->bio); setup_req_params( drive ); do_fd_action( drive ); @@ -1952,7 +1952,7 @@ static int __init atari_floppy_init (void) goto Enomem; } TrackBuffer = DMABuffer + 512; - PhysDMABuffer = virt_to_phys(DMABuffer); + PhysDMABuffer = atari_stram_to_phys(DMABuffer); PhysTrackBuffer = virt_to_phys(TrackBuffer); BufferDrive = BufferSide = BufferTrack = -1; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index e73b85cf075..c7d138eca73 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -200,11 +200,11 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) copy = min_t(size_t, n, PAGE_SIZE - offset); if (!brd_insert_page(brd, sector)) - return -ENOMEM; + return -ENOSPC; if (copy < n) { sector += copy >> SECTOR_SHIFT; if (!brd_insert_page(brd, sector)) - return -ENOMEM; + return -ENOSPC; } return 0; } @@ -360,6 +360,15 @@ out: bio_endio(bio, err); } +static int brd_rw_page(struct block_device *bdev, sector_t sector, + struct page *page, int rw) +{ + struct brd_device *brd = bdev->bd_disk->private_data; + int err = brd_do_bvec(brd, page, PAGE_CACHE_SIZE, 0, rw, sector); + page_endio(page, rw & WRITE, err); + return err; +} + #ifdef CONFIG_BLK_DEV_XIP static int brd_direct_access(struct block_device *bdev, sector_t sector, void **kaddr, unsigned long *pfn) @@ -375,7 +384,7 @@ static int brd_direct_access(struct block_device *bdev, sector_t sector, return -ERANGE; page = brd_insert_page(brd, sector); if (!page) - return -ENOMEM; + return -ENOSPC; *kaddr = page_address(page); *pfn = page_to_pfn(page); @@ -419,6 +428,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode, static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, + .rw_page = brd_rw_page, .ioctl = brd_ioctl, #ifdef CONFIG_BLK_DEV_XIP .direct_access = brd_direct_access, diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 73894ca3395..4595c22f33f 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4080,7 +4080,7 @@ static void cciss_interrupt_mode(ctlr_info_t *h) goto default_int_mode; if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { - err = pci_enable_msix(h->pdev, cciss_msix_entries, 4); + err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4); if (!err) { h->intr[0] = cciss_msix_entries[0].vector; h->intr[1] = cciss_msix_entries[1].vector; @@ -4088,10 +4088,6 @@ static void cciss_interrupt_mode(ctlr_info_t *h) h->intr[3] = cciss_msix_entries[3].vector; h->msix_vector = 1; return; - } - if (err > 0) { - dev_warn(&h->pdev->dev, - "only %d MSI-X vectors available\n", err); } else { dev_warn(&h->pdev->dev, "MSI-X init failed %d\n", err); diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 90ae4ba8f9e..05a1780ffa8 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -29,7 +29,6 @@ #include <linux/drbd_limits.h> #include <linux/dynamic_debug.h> #include "drbd_int.h" -#include "drbd_wrappers.h" enum al_transaction_types { @@ -204,7 +203,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd BUG_ON(!bdev->md_bdev); - drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", + dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", (void*)_RET_IP_ ); @@ -276,7 +275,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval return _al_get(device, first, true); } -static bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, @@ -846,7 +844,7 @@ void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, int wake_up = 0; unsigned long flags; - if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; @@ -920,7 +918,7 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size if (size == 0) return 0; - if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { + if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { drbd_err(device, "sector: %llus, size: %d\n", (unsigned long long)sector, size); return 0; @@ -1023,8 +1021,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) unsigned int enr = BM_SECT_TO_EXT(sector); struct bm_extent *bm_ext; int i, sig; - int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait. - 200 times -> 20 seconds. */ + bool sa; retry: sig = wait_event_interruptible(device->al_wait, @@ -1035,12 +1032,15 @@ retry: if (test_bit(BME_LOCKED, &bm_ext->flags)) return 0; + /* step aside only while we are above c-min-rate; unless disabled. */ + sa = drbd_rs_c_min_rate_throttle(device); + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { sig = wait_event_interruptible(device->al_wait, !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || - test_bit(BME_PRIORITY, &bm_ext->flags)); + (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); - if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { + if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { spin_lock_irq(&device->al_lock); if (lc_put(device->resync, &bm_ext->lce) == 0) { bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ @@ -1052,9 +1052,6 @@ retry: return -EINTR; if (schedule_timeout_interruptible(HZ/10)) return -EINTR; - if (sa && --sa == 0) - drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec." - "Resync stalled?\n"); goto retry; } } @@ -1288,7 +1285,7 @@ void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) sector_t esector, nr_sectors; int wake_up = 0; - if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e7093d4291f..a76ceb344d6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -382,6 +382,12 @@ enum { __EE_CALL_AL_COMPLETE_IO, __EE_MAY_SET_IN_SYNC, + /* is this a TRIM aka REQ_DISCARD? */ + __EE_IS_TRIM, + /* our lower level cannot handle trim, + * and we want to fall back to zeroout instead */ + __EE_IS_TRIM_USE_ZEROOUT, + /* In case a barrier failed, * we need to resubmit without the barrier flag. */ __EE_RESUBMITTED, @@ -405,7 +411,9 @@ enum { }; #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) -#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) +#define EE_IS_TRIM (1<<__EE_IS_TRIM) +#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT) +#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) @@ -579,6 +587,7 @@ struct drbd_resource { struct list_head resources; struct res_opts res_opts; struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */ + struct mutex adm_mutex; /* mutex to serialize administrative requests */ spinlock_t req_lock; unsigned susp:1; /* IO suspended by user */ @@ -609,6 +618,7 @@ struct drbd_connection { struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ int agreed_pro_version; /* actually used protocol version */ + u32 agreed_features; unsigned long last_received; /* in jiffies, either socket */ unsigned int ko_count; @@ -814,6 +824,28 @@ struct drbd_device { struct submit_worker submit; }; +struct drbd_config_context { + /* assigned from drbd_genlmsghdr */ + unsigned int minor; + /* assigned from request attributes, if present */ + unsigned int volume; +#define VOLUME_UNSPECIFIED (-1U) + /* pointer into the request skb, + * limited lifetime! */ + char *resource_name; + struct nlattr *my_addr; + struct nlattr *peer_addr; + + /* reply buffer */ + struct sk_buff *reply_skb; + /* pointer into reply buffer */ + struct drbd_genlmsghdr *reply_dh; + /* resolved from attributes, if possible */ + struct drbd_device *device; + struct drbd_resource *resource; + struct drbd_connection *connection; +}; + static inline struct drbd_device *minor_to_device(unsigned int minor) { return (struct drbd_device *)idr_find(&drbd_devices, minor); @@ -821,7 +853,7 @@ static inline struct drbd_device *minor_to_device(unsigned int minor) static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device) { - return list_first_entry(&device->peer_devices, struct drbd_peer_device, peer_devices); + return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices); } #define for_each_resource(resource, _resources) \ @@ -1139,6 +1171,12 @@ struct bm_extent { #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */ #define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ +/* For now, don't allow more than one activity log extent worth of data + * to be discarded in one go. We may need to rework drbd_al_begin_io() + * to allow for even larger discard ranges */ +#define DRBD_MAX_DISCARD_SIZE AL_EXTENT_SIZE +#define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9) + extern int drbd_bm_init(struct drbd_device *device); extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits); extern void drbd_bm_cleanup(struct drbd_device *device); @@ -1229,9 +1267,9 @@ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_connection *connection); -enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr); +extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor); extern void drbd_destroy_device(struct kref *kref); -extern void drbd_delete_device(struct drbd_device *mdev); +extern void drbd_delete_device(struct drbd_device *device); extern struct drbd_resource *drbd_create_resource(const char *name); extern void drbd_free_resource(struct drbd_resource *resource); @@ -1257,7 +1295,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ -extern int drbd_msg_put_info(const char *info); +extern int drbd_msg_put_info(struct sk_buff *skb, const char *info); extern void drbd_suspend_io(struct drbd_device *device); extern void drbd_resume_io(struct drbd_device *device); extern char *ppsize(char *buf, unsigned long long size); @@ -1283,6 +1321,10 @@ extern void conn_try_outdate_peer_async(struct drbd_connection *connection); extern int drbd_khelper(struct drbd_device *device, char *cmd); /* drbd_worker.c */ +/* bi_end_io handlers */ +extern void drbd_md_io_complete(struct bio *bio, int error); +extern void drbd_peer_request_endio(struct bio *bio, int error); +extern void drbd_request_endio(struct bio *bio, int error); extern int drbd_worker(struct drbd_thread *thi); enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor); void drbd_resync_after_changed(struct drbd_device *device); @@ -1332,16 +1374,20 @@ extern int w_start_resync(struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); extern void start_resync_timer_fn(unsigned long data); +extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); + /* drbd_receiver.c */ extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_asender(struct drbd_thread *thi); -extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); +extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); +extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); extern int drbd_submit_peer_request(struct drbd_device *, struct drbd_peer_request *, const unsigned, const int); extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, sector_t, unsigned int, + bool, gfp_t) __must_hold(local); extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, int); @@ -1401,6 +1447,37 @@ static inline void drbd_tcp_quickack(struct socket *sock) (char*)&val, sizeof(val)); } +/* sets the number of 512 byte sectors of our virtual device */ +static inline void drbd_set_my_capacity(struct drbd_device *device, + sector_t size) +{ + /* set_capacity(device->this_bdev->bd_disk, size); */ + set_capacity(device->vdisk, size); + device->this_bdev->bd_inode->i_size = (loff_t)size << 9; +} + +/* + * used to submit our private bio + */ +static inline void drbd_generic_make_request(struct drbd_device *device, + int fault_type, struct bio *bio) +{ + __release(local); + if (!bio->bi_bdev) { + printk(KERN_ERR "drbd%d: drbd_generic_make_request: " + "bio->bi_bdev == NULL\n", + device_to_minor(device)); + dump_stack(); + bio_endio(bio, -ENODEV); + return; + } + + if (drbd_insert_fault(device, fault_type)) + bio_endio(bio, -EIO); + else + generic_make_request(bio); +} + void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo); /* drbd_proc.c */ @@ -1410,6 +1487,7 @@ extern const char *drbd_conn_str(enum drbd_conns s); extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ +extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); @@ -2144,7 +2222,7 @@ static inline void drbd_md_flush(struct drbd_device *device) static inline struct drbd_connection *first_connection(struct drbd_resource *resource) { - return list_first_entry(&resource->connections, + return list_first_entry_or_null(&resource->connections, struct drbd_connection, connections); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 331e5cc1227..960645c26e6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1607,8 +1607,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long b return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0; } -/* Used to send write requests - * R_PRIMARY -> Peer (P_DATA) +/* Used to send write or TRIM aka REQ_DISCARD requests + * R_PRIMARY -> Peer (P_DATA, P_TRIM) */ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req) { @@ -1640,6 +1640,16 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * dp_flags |= DP_SEND_WRITE_ACK; } p->dp_flags = cpu_to_be32(dp_flags); + + if (dp_flags & DP_DISCARD) { + struct p_trim *t = (struct p_trim*)p; + t->size = cpu_to_be32(req->i.size); + err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0); + goto out; + } + + /* our digest is still only over the payload. + * TRIM does not carry any payload. */ if (dgs) drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1); err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); @@ -1675,6 +1685,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * ... Be noisy about digest too large ... } */ } +out: mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ return err; @@ -2570,6 +2581,7 @@ struct drbd_resource *drbd_create_resource(const char *name) INIT_LIST_HEAD(&resource->connections); list_add_tail_rcu(&resource->resources, &drbd_resources); mutex_init(&resource->conf_update); + mutex_init(&resource->adm_mutex); spin_lock_init(&resource->req_lock); return resource; @@ -2687,14 +2699,16 @@ static int init_submitter(struct drbd_device *device) return 0; } -enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr) +enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor) { + struct drbd_resource *resource = adm_ctx->resource; struct drbd_connection *connection; struct drbd_device *device; struct drbd_peer_device *peer_device, *tmp_peer_device; struct gendisk *disk; struct request_queue *q; int id; + int vnr = adm_ctx->volume; enum drbd_ret_code err = ERR_NOMEM; device = minor_to_device(minor); @@ -2763,7 +2777,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i if (id < 0) { if (id == -ENOSPC) { err = ERR_MINOR_EXISTS; - drbd_msg_put_info("requested minor exists already"); + drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already"); } goto out_no_minor_idr; } @@ -2773,7 +2787,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i if (id < 0) { if (id == -ENOSPC) { err = ERR_MINOR_EXISTS; - drbd_msg_put_info("requested minor exists already"); + drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already"); } goto out_idr_remove_minor; } @@ -2794,7 +2808,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i if (id < 0) { if (id == -ENOSPC) { err = ERR_INVALID_REQUEST; - drbd_msg_put_info("requested volume exists already"); + drbd_msg_put_info(adm_ctx->reply_skb, "requested volume exists already"); } goto out_idr_remove_from_resource; } @@ -2803,7 +2817,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i if (init_submitter(device)) { err = ERR_NOMEM; - drbd_msg_put_info("unable to create submit workqueue"); + drbd_msg_put_info(adm_ctx->reply_skb, "unable to create submit workqueue"); goto out_idr_remove_vol; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 526414bc2ca..1b35c45c92b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -34,7 +34,6 @@ #include "drbd_int.h" #include "drbd_protocol.h" #include "drbd_req.h" -#include "drbd_wrappers.h" #include <asm/unaligned.h> #include <linux/drbd_limits.h> #include <linux/kthread.h> @@ -82,32 +81,6 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); /* used blkdev_get_by_path, to claim our meta data device(s) */ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; -/* Configuration is strictly serialized, because generic netlink message - * processing is strictly serialized by the genl_lock(). - * Which means we can use one static global drbd_config_context struct. - */ -static struct drbd_config_context { - /* assigned from drbd_genlmsghdr */ - unsigned int minor; - /* assigned from request attributes, if present */ - unsigned int volume; -#define VOLUME_UNSPECIFIED (-1U) - /* pointer into the request skb, - * limited lifetime! */ - char *resource_name; - struct nlattr *my_addr; - struct nlattr *peer_addr; - - /* reply buffer */ - struct sk_buff *reply_skb; - /* pointer into reply buffer */ - struct drbd_genlmsghdr *reply_dh; - /* resolved from attributes, if possible */ - struct drbd_device *device; - struct drbd_resource *resource; - struct drbd_connection *connection; -} adm_ctx; - static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) { genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); @@ -117,9 +90,8 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only * reason it could fail was no space in skb, and there are 4k available. */ -int drbd_msg_put_info(const char *info) +int drbd_msg_put_info(struct sk_buff *skb, const char *info) { - struct sk_buff *skb = adm_ctx.reply_skb; struct nlattr *nla; int err = -EMSGSIZE; @@ -143,42 +115,46 @@ int drbd_msg_put_info(const char *info) * and per-family private info->pointers. * But we need to stay compatible with older kernels. * If it returns successfully, adm_ctx members are valid. + * + * At this point, we still rely on the global genl_lock(). + * If we want to avoid that, and allow "genl_family.parallel_ops", we may need + * to add additional synchronization against object destruction/modification. */ #define DRBD_ADM_NEED_MINOR 1 #define DRBD_ADM_NEED_RESOURCE 2 #define DRBD_ADM_NEED_CONNECTION 4 -static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, - unsigned flags) +static int drbd_adm_prepare(struct drbd_config_context *adm_ctx, + struct sk_buff *skb, struct genl_info *info, unsigned flags) { struct drbd_genlmsghdr *d_in = info->userhdr; const u8 cmd = info->genlhdr->cmd; int err; - memset(&adm_ctx, 0, sizeof(adm_ctx)); + memset(adm_ctx, 0, sizeof(*adm_ctx)); /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN)) return -EPERM; - adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!adm_ctx.reply_skb) { + adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!adm_ctx->reply_skb) { err = -ENOMEM; goto fail; } - adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb, + adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb, info, &drbd_genl_family, 0, cmd); /* put of a few bytes into a fresh skb of >= 4k will always succeed. * but anyways */ - if (!adm_ctx.reply_dh) { + if (!adm_ctx->reply_dh) { err = -ENOMEM; goto fail; } - adm_ctx.reply_dh->minor = d_in->minor; - adm_ctx.reply_dh->ret_code = NO_ERROR; + adm_ctx->reply_dh->minor = d_in->minor; + adm_ctx->reply_dh->ret_code = NO_ERROR; - adm_ctx.volume = VOLUME_UNSPECIFIED; + adm_ctx->volume = VOLUME_UNSPECIFIED; if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { struct nlattr *nla; /* parse and validate only */ @@ -188,111 +164,131 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, /* It was present, and valid, * copy it over to the reply skb. */ - err = nla_put_nohdr(adm_ctx.reply_skb, + err = nla_put_nohdr(adm_ctx->reply_skb, info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len, info->attrs[DRBD_NLA_CFG_CONTEXT]); if (err) goto fail; - /* and assign stuff to the global adm_ctx */ + /* and assign stuff to the adm_ctx */ nla = nested_attr_tb[__nla_type(T_ctx_volume)]; if (nla) - adm_ctx.volume = nla_get_u32(nla); + adm_ctx->volume = nla_get_u32(nla); nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; if (nla) - adm_ctx.resource_name = nla_data(nla); - adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; - adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; - if ((adm_ctx.my_addr && - nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) || - (adm_ctx.peer_addr && - nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) { + adm_ctx->resource_name = nla_data(nla); + adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; + adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; + if ((adm_ctx->my_addr && + nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) || + (adm_ctx->peer_addr && + nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) { err = -EINVAL; goto fail; } } - adm_ctx.minor = d_in->minor; - adm_ctx.device = minor_to_device(d_in->minor); - if (adm_ctx.resource_name) { - adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name); + adm_ctx->minor = d_in->minor; + adm_ctx->device = minor_to_device(d_in->minor); + + /* We are protected by the global genl_lock(). + * But we may explicitly drop it/retake it in drbd_adm_set_role(), + * so make sure this object stays around. */ + if (adm_ctx->device) + kref_get(&adm_ctx->device->kref); + + if (adm_ctx->resource_name) { + adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name); } - if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) { - drbd_msg_put_info("unknown minor"); + if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) { + drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor"); return ERR_MINOR_INVALID; } - if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) { - drbd_msg_put_info("unknown resource"); - if (adm_ctx.resource_name) + if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) { + drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource"); + if (adm_ctx->resource_name) return ERR_RES_NOT_KNOWN; return ERR_INVALID_REQUEST; } if (flags & DRBD_ADM_NEED_CONNECTION) { - if (adm_ctx.resource) { - drbd_msg_put_info("no resource name expected"); + if (adm_ctx->resource) { + drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected"); return ERR_INVALID_REQUEST; } - if (adm_ctx.device) { - drbd_msg_put_info("no minor number expected"); + if (adm_ctx->device) { + drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected"); return ERR_INVALID_REQUEST; } - if (adm_ctx.my_addr && adm_ctx.peer_addr) - adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr), |