diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 14:35:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 14:35:28 -0700 |
commit | cc8362b1f6d724e46f515121d442779924b19fec (patch) | |
tree | 86fb5c3767e538ec9ded57dd7b3ce5d69dcde691 | |
parent | 2e3ee613480563a6d5c01b57d342e65cc58c06df (diff) | |
parent | 1fe5e9932156f6122c3b1ff6ba7541c27c86718c (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph changes from Sage Weil:
"Lots of stuff this time around:
- lots of cleanup and refactoring in the libceph messenger code, and
many hard to hit races and bugs closed as a result.
- lots of cleanup and refactoring in the rbd code from Alex Elder,
mostly in preparation for the layering functionality that will be
coming in 3.7.
- some misc rbd cleanups from Josh Durgin that are finally going
upstream
- support for CRUSH tunables (used by newer clusters to improve the
data placement)
- some cleanup in our use of d_parent that Al brought up a while back
- a random collection of fixes across the tree
There is another patch coming that fixes up our ->atomic_open()
behavior, but I'm going to hammer on it a bit more before sending it."
Fix up conflicts due to commits that were already committed earlier in
drivers/block/rbd.c, net/ceph/{messenger.c, osd_client.c}
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (132 commits)
rbd: create rbd_refresh_helper()
rbd: return obj version in __rbd_refresh_header()
rbd: fixes in rbd_header_from_disk()
rbd: always pass ops array to rbd_req_sync_op()
rbd: pass null version pointer in add_snap()
rbd: make rbd_create_rw_ops() return a pointer
rbd: have __rbd_add_snap_dev() return a pointer
libceph: recheck con state after allocating incoming message
libceph: change ceph_con_in_msg_alloc convention to be less weird
libceph: avoid dropping con mutex before fault
libceph: verify state after retaking con lock after dispatch
libceph: revoke mon_client messages on session restart
libceph: fix handling of immediate socket connect failure
ceph: update MAINTAINERS file
libceph: be less chatty about stray replies
libceph: clear all flags on con_close
libceph: clean up con flags
libceph: replace connection state bits with states
libceph: drop unnecessary CLOSED check in socket state change callback
libceph: close socket directly from ceph_con_close()
...
-rw-r--r-- | Documentation/ABI/testing/sysfs-bus-rbd | 10 | ||||
-rw-r--r-- | MAINTAINERS | 13 | ||||
-rw-r--r-- | drivers/block/rbd.c | 816 | ||||
-rw-r--r-- | drivers/block/rbd_types.h | 1 | ||||
-rw-r--r-- | fs/ceph/dir.c | 7 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 23 | ||||
-rw-r--r-- | fs/ceph/snap.c | 18 | ||||
-rw-r--r-- | fs/ceph/super.c | 1 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 1 | ||||
-rw-r--r-- | include/linux/ceph/ceph_features.h | 27 | ||||
-rw-r--r-- | include/linux/ceph/ceph_fs.h | 14 | ||||
-rw-r--r-- | include/linux/ceph/decode.h | 49 | ||||
-rw-r--r-- | include/linux/ceph/libceph.h | 10 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 60 | ||||
-rw-r--r-- | include/linux/ceph/mon_client.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/msgpool.h | 3 | ||||
-rw-r--r-- | include/linux/crush/crush.h | 8 | ||||
-rw-r--r-- | net/ceph/ceph_common.c | 25 | ||||
-rw-r--r-- | net/ceph/crush/mapper.c | 13 | ||||
-rw-r--r-- | net/ceph/messenger.c | 925 | ||||
-rw-r--r-- | net/ceph/mon_client.c | 76 | ||||
-rw-r--r-- | net/ceph/msgpool.c | 7 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 77 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 59 |
25 files changed, 1351 insertions, 898 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd index bcd88eb7ebc..3c17b62899f 100644 --- a/Documentation/ABI/testing/sysfs-bus-rbd +++ b/Documentation/ABI/testing/sysfs-bus-rbd @@ -35,8 +35,14 @@ name pool - The pool where this rbd image resides. The pool-name pair is unique - per rados system. + The name of the storage pool where this rbd image resides. + An rbd image name is unique within its pool. + +pool_id + + The unique identifier for the rbd image's pool. This is + a permanent attribute of the pool. A pool's id will never + change. size diff --git a/MAINTAINERS b/MAINTAINERS index fb036a062a5..5b44872b64e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1789,15 +1789,16 @@ F: arch/powerpc/oprofile/*cell* F: arch/powerpc/platforms/cell/ CEPH DISTRIBUTED FILE SYSTEM CLIENT -M: Sage Weil <sage@newdream.net> +M: Sage Weil <sage@inktank.com> L: ceph-devel@vger.kernel.org -W: http://ceph.newdream.net/ +W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported F: Documentation/filesystems/ceph.txt F: fs/ceph F: net/ceph F: include/linux/ceph +F: include/linux/crush CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: L: linux-usb@vger.kernel.org @@ -5639,10 +5640,12 @@ S: Supported F: arch/hexagon/ RADOS BLOCK DEVICE (RBD) -F: include/linux/qnxtypes.h -M: Yehuda Sadeh <yehuda@hq.newdream.net> -M: Sage Weil <sage@newdream.net> +M: Yehuda Sadeh <yehuda@inktank.com> +M: Sage Weil <sage@inktank.com> +M: Alex Elder <elder@inktank.com> M: ceph-devel@vger.kernel.org +W: http://ceph.com/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported F: drivers/block/rbd.c F: drivers/block/rbd_types.h diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8f428a8ab00..9917943a357 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -55,8 +55,6 @@ #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ -#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX)) -#define RBD_MAX_POOL_NAME_LEN 64 #define RBD_MAX_SNAP_NAME_LEN 32 #define RBD_MAX_OPT_LEN 1024 @@ -78,13 +76,12 @@ */ struct rbd_image_header { u64 image_size; - char block_name[32]; + char *object_prefix; __u8 obj_order; __u8 crypt_type; __u8 comp_type; struct ceph_snap_context *snapc; size_t snap_names_len; - u64 snap_seq; u32 total_snaps; char *snap_names; @@ -150,7 +147,7 @@ struct rbd_snap { * a single device */ struct rbd_device { - int id; /* blkdev unique id */ + int dev_id; /* blkdev unique id */ int major; /* blkdev assigned major */ struct gendisk *disk; /* blkdev's gendisk and rq */ @@ -163,20 +160,24 @@ struct rbd_device { spinlock_t lock; /* queue lock */ struct rbd_image_header header; - char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ - int obj_len; - char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ - char pool_name[RBD_MAX_POOL_NAME_LEN]; - int poolid; + char *image_name; + size_t image_name_len; + char *header_name; + char *pool_name; + int pool_id; struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; /* protects updating the header */ struct rw_semaphore header_rwsem; - char snap_name[RBD_MAX_SNAP_NAME_LEN]; + /* name of the snapshot this device reads from */ + char *snap_name; + /* id of the snapshot this device reads from */ u64 snap_id; /* current snapshot id */ - int read_only; + /* whether the snap_id this device reads from still exists */ + bool snap_exists; + int read_only; struct list_head node; @@ -201,8 +202,7 @@ static ssize_t rbd_snap_add(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); -static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, - struct rbd_snap *snap); +static void __rbd_remove_snap_dev(struct rbd_snap *snap); static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count); @@ -240,7 +240,7 @@ static void rbd_put_dev(struct rbd_device *rbd_dev) put_device(&rbd_dev->dev); } -static int __rbd_refresh_header(struct rbd_device *rbd_dev); +static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { @@ -273,9 +273,9 @@ static const struct block_device_operations rbd_bd_ops = { /* * Initialize an rbd client instance. - * We own *opt. + * We own *ceph_opts. */ -static struct rbd_client *rbd_client_create(struct ceph_options *opt, +static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts, struct rbd_options *rbd_opts) { struct rbd_client *rbdc; @@ -291,10 +291,10 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt, mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rbdc->client = ceph_create_client(opt, rbdc, 0, 0); + rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); if (IS_ERR(rbdc->client)) goto out_mutex; - opt = NULL; /* Now rbdc->client is responsible for opt */ + ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ ret = ceph_open_session(rbdc->client); if (ret < 0) @@ -317,23 +317,23 @@ out_mutex: mutex_unlock(&ctl_mutex); kfree(rbdc); out_opt: - if (opt) - ceph_destroy_options(opt); + if (ceph_opts) + ceph_destroy_options(ceph_opts); return ERR_PTR(ret); } /* * Find a ceph client with specific addr and configuration. */ -static struct rbd_client *__rbd_client_find(struct ceph_options *opt) +static struct rbd_client *__rbd_client_find(struct ceph_options *ceph_opts) { struct rbd_client *client_node; - if (opt->flags & CEPH_OPT_NOSHARE) + if (ceph_opts->flags & CEPH_OPT_NOSHARE) return NULL; list_for_each_entry(client_node, &rbd_client_list, node) - if (ceph_compare_options(opt, client_node->client) == 0) + if (!ceph_compare_options(ceph_opts, client_node->client)) return client_node; return NULL; } @@ -349,7 +349,7 @@ enum { /* string args above */ }; -static match_table_t rbdopt_tokens = { +static match_table_t rbd_opts_tokens = { {Opt_notify_timeout, "notify_timeout=%d"}, /* int args above */ /* string args above */ @@ -358,11 +358,11 @@ static match_table_t rbdopt_tokens = { static int parse_rbd_opts_token(char *c, void *private) { - struct rbd_options *rbdopt = private; + struct rbd_options *rbd_opts = private; substring_t argstr[MAX_OPT_ARGS]; int token, intval, ret; - token = match_token(c, rbdopt_tokens, argstr); + token = match_token(c, rbd_opts_tokens, argstr); if (token < 0) return -EINVAL; @@ -383,7 +383,7 @@ static int parse_rbd_opts_token(char *c, void *private) switch (token) { case Opt_notify_timeout: - rbdopt->notify_timeout = intval; + rbd_opts->notify_timeout = intval; break; default: BUG_ON(token); @@ -400,7 +400,7 @@ static struct rbd_client *rbd_get_client(const char *mon_addr, char *options) { struct rbd_client *rbdc; - struct ceph_options *opt; + struct ceph_options *ceph_opts; struct rbd_options *rbd_opts; rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); @@ -409,29 +409,29 @@ static struct rbd_client *rbd_get_client(const char *mon_addr, rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; - opt = ceph_parse_options(options, mon_addr, - mon_addr + mon_addr_len, - parse_rbd_opts_token, rbd_opts); - if (IS_ERR(opt)) { + ceph_opts = ceph_parse_options(options, mon_addr, + mon_addr + mon_addr_len, + parse_rbd_opts_token, rbd_opts); + if (IS_ERR(ceph_opts)) { kfree(rbd_opts); - return ERR_CAST(opt); + return ERR_CAST(ceph_opts); } spin_lock(&rbd_client_list_lock); - rbdc = __rbd_client_find(opt); + rbdc = __rbd_client_find(ceph_opts); if (rbdc) { /* using an existing client */ kref_get(&rbdc->kref); spin_unlock(&rbd_client_list_lock); - ceph_destroy_options(opt); + ceph_destroy_options(ceph_opts); kfree(rbd_opts); return rbdc; } spin_unlock(&rbd_client_list_lock); - rbdc = rbd_client_create(opt, rbd_opts); + rbdc = rbd_client_create(ceph_opts, rbd_opts); if (IS_ERR(rbdc)) kfree(rbd_opts); @@ -480,46 +480,60 @@ static void rbd_coll_release(struct kref *kref) kfree(coll); } +static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) +{ + return !memcmp(&ondisk->text, + RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)); +} + /* * Create a new header structure, translate header format from the on-disk * header. */ static int rbd_header_from_disk(struct rbd_image_header *header, struct rbd_image_header_ondisk *ondisk, - u32 allocated_snaps, - gfp_t gfp_flags) + u32 allocated_snaps) { - u32 i, snap_count; + u32 snap_count; - if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) + if (!rbd_dev_ondisk_valid(ondisk)) return -ENXIO; snap_count = le32_to_cpu(ondisk->snap_count); - if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context)) - / sizeof (*ondisk)) + if (snap_count > (SIZE_MAX - sizeof(struct ceph_snap_context)) + / sizeof (u64)) return -EINVAL; header->snapc = kmalloc(sizeof(struct ceph_snap_context) + snap_count * sizeof(u64), - gfp_flags); + GFP_KERNEL); if (!header->snapc) return -ENOMEM; - header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); if (snap_count) { + header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); header->snap_names = kmalloc(header->snap_names_len, - gfp_flags); + GFP_KERNEL); if (!header->snap_names) goto err_snapc; header->snap_sizes = kmalloc(snap_count * sizeof(u64), - gfp_flags); + GFP_KERNEL); if (!header->snap_sizes) goto err_names; } else { + WARN_ON(ondisk->snap_names_len); + header->snap_names_len = 0; header->snap_names = NULL; header->snap_sizes = NULL; } - memcpy(header->block_name, ondisk->block_name, + + header->object_prefix = kmalloc(sizeof (ondisk->block_name) + 1, + GFP_KERNEL); + if (!header->object_prefix) + goto err_sizes; + + memcpy(header->object_prefix, ondisk->block_name, sizeof(ondisk->block_name)); + header->object_prefix[sizeof (ondisk->block_name)] = '\0'; header->image_size = le64_to_cpu(ondisk->image_size); header->obj_order = ondisk->options.order; @@ -527,11 +541,13 @@ static int rbd_header_from_disk(struct rbd_image_header *header, header->comp_type = ondisk->options.comp_type; atomic_set(&header->snapc->nref, 1); - header->snap_seq = le64_to_cpu(ondisk->snap_seq); + header->snapc->seq = le64_to_cpu(ondisk->snap_seq); header->snapc->num_snaps = snap_count; header->total_snaps = snap_count; if (snap_count && allocated_snaps == snap_count) { + int i; + for (i = 0; i < snap_count; i++) { header->snapc->snaps[i] = le64_to_cpu(ondisk->snaps[i].id); @@ -540,16 +556,22 @@ static int rbd_header_from_disk(struct rbd_image_header *header, } /* copy snapshot names */ - memcpy(header->snap_names, &ondisk->snaps[i], + memcpy(header->snap_names, &ondisk->snaps[snap_count], header->snap_names_len); } return 0; +err_sizes: + kfree(header->snap_sizes); + header->snap_sizes = NULL; err_names: kfree(header->snap_names); + header->snap_names = NULL; err_snapc: kfree(header->snapc); + header->snapc = NULL; + return -ENOMEM; } @@ -575,52 +597,50 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name, return -ENOENT; } -static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) +static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size) { - struct rbd_image_header *header = &dev->header; - struct ceph_snap_context *snapc = header->snapc; - int ret = -ENOENT; - - BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME)); + int ret; - down_write(&dev->header_rwsem); + down_write(&rbd_dev->header_rwsem); - if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME, + if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, sizeof (RBD_SNAP_HEAD_NAME))) { - if (header->total_snaps) - snapc->seq = header->snap_seq; - else - snapc->seq = 0; - dev->snap_id = CEPH_NOSNAP; - dev->read_only = 0; + rbd_dev->snap_id = CEPH_NOSNAP; + rbd_dev->snap_exists = false; + rbd_dev->read_only = 0; if (size) - *size = header->image_size; + *size = rbd_dev->header.image_size; } else { - ret = snap_by_name(header, dev->snap_name, &snapc->seq, size); + u64 snap_id = 0; + + ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name, + &snap_id, size); if (ret < 0) goto done; - dev->snap_id = snapc->seq; - dev->read_only = 1; + rbd_dev->snap_id = snap_id; + rbd_dev->snap_exists = true; + rbd_dev->read_only = 1; } ret = 0; done: - up_write(&dev->header_rwsem); + up_write(&rbd_dev->header_rwsem); return ret; } static void rbd_header_free(struct rbd_image_header *header) { - kfree(header->snapc); - kfree(header->snap_names); + kfree(header->object_prefix); kfree(header->snap_sizes); + kfree(header->snap_names); + ceph_put_snap_context(header->snapc); } /* * get the actual striped segment name, offset and length */ static u64 rbd_get_segment(struct rbd_image_header *header, - const char *block_name, + const char *object_prefix, u64 ofs, u64 len, char *seg_name, u64 *segofs) { @@ -628,7 +648,7 @@ static u64 rbd_get_segment(struct rbd_image_header *header, if (seg_name) snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, - "%s.%012llx", block_name, seg); + "%s.%012llx", object_prefix, seg); ofs = ofs & ((1 << header->obj_order) - 1); len = min_t(u64, len, (1 << header->obj_order) - ofs); @@ -726,9 +746,8 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next, * split_bio will BUG_ON if this is not the case */ dout("bio_chain_clone split! total=%d remaining=%d" - "bi_size=%d\n", - (int)total, (int)len-total, - (int)old_chain->bi_size); + "bi_size=%u\n", + total, len - total, old_chain->bi_size); /* split the bio. We'll release it either in the next call, or it will have to be released outside */ @@ -777,22 +796,24 @@ err_out: /* * helpers for osd request op vectors. */ -static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, - int num_ops, - int opcode, - u32 payload_len) -{ - *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), - GFP_NOIO); - if (!*ops) - return -ENOMEM; - (*ops)[0].op = opcode; +static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, + int opcode, u32 payload_len) +{ + struct ceph_osd_req_op *ops; + + ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); + if (!ops) + return NULL; + + ops[0].op = opcode; + /* * op extent offset and length will be set later on * in calc_raw_layout() */ - (*ops)[0].payload_len = payload_len; - return 0; + ops[0].payload_len = payload_len; + + return ops; } static void rbd_destroy_ops(struct ceph_osd_req_op *ops) @@ -808,8 +829,8 @@ static void rbd_coll_end_req_index(struct request *rq, struct request_queue *q; int min, max, i; - dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n", - coll, index, ret, len); + dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", + coll, index, ret, (unsigned long long) len); if (!rq) return; @@ -848,16 +869,15 @@ static void rbd_coll_end_req(struct rbd_request *req, * Send ceph osd request */ static int rbd_do_request(struct request *rq, - struct rbd_device *dev, + struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, u64 snapid, - const char *obj, u64 ofs, u64 len, + const char *object_name, u64 ofs, u64 len, struct bio *bio, struct page **pages, int num_pages, int flags, struct ceph_osd_req_op *ops, - int num_reply, struct rbd_req_coll *coll, int coll_index, void (*rbd_cb)(struct ceph_osd_request *req, @@ -887,15 +907,13 @@ static int rbd_do_request(struct request *rq, req_data->coll_index = coll_index; } - dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); - - down_read(&dev->header_rwsem); + dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, + (unsigned long long) ofs, (unsigned long long) len); - osdc = &dev->rbd_client->client->osdc; + osdc = &rbd_dev->rbd_client->client->osdc; req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, false, GFP_NOIO, pages, bio); if (!req) { - up_read(&dev->header_rwsem); ret = -ENOMEM; goto done_pages; } @@ -912,7 +930,7 @@ static int rbd_do_request(struct request *rq, reqhead = req->r_request->front.iov_base; reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); - strncpy(req->r_oid, obj, sizeof(req->r_oid)); + strncpy(req->r_oid, object_name, sizeof(req->r_oid)); req->r_oid_len = strlen(req->r_oid); layout = &req->r_file_layout; @@ -920,7 +938,7 @@ static int rbd_do_request(struct request *rq, layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_stripe_count = cpu_to_le32(1); layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32(dev->poolid); + layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, req, ops); @@ -929,7 +947,6 @@ static int rbd_do_request(struct request *rq, snapc, &mtime, req->r_oid, req->r_oid_len); - up_read(&dev->header_rwsem); if (linger_req) { ceph_osdc_set_request_linger(osdc, req); @@ -944,8 +961,9 @@ static int rbd_do_request(struct request *rq, ret = ceph_osdc_wait_request(osdc, req); if (ver) *ver = le64_to_cpu(req->r_reassert_version.version); - dout("reassert_ver=%lld\n", - le64_to_cpu(req->r_reassert_version.version)); + dout("reassert_ver=%llu\n", + (unsigned long long) + le64_to_cpu(req->r_reassert_version.version)); ceph_osdc_put_request(req); } return ret; @@ -979,7 +997,8 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) bytes = le64_to_cpu(op->extent.length); read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); - dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); + dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", + (unsigned long long) bytes, read_op, (int) rc); if (rc == -ENOENT && read_op) { zero_bio_chain(req_data->bio, 0); @@ -1006,14 +1025,12 @@ static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg /* * Do a synchronous ceph osd operation */ -static int rbd_req_sync_op(struct rbd_device *dev, +static int rbd_req_sync_op(struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, u64 snapid, - int opcode, int flags, - struct ceph_osd_req_op *orig_ops, - int num_reply, - const char *obj, + struct ceph_osd_req_op *ops, + const char *object_name, u64 ofs, u64 len, char *buf, struct ceph_osd_request **linger_req, @@ -1022,45 +1039,28 @@ static int rbd_req_sync_op(struct rbd_device *dev, int ret; struct page **pages; int num_pages; - struct ceph_osd_req_op *ops = orig_ops; - u32 payload_len; + + BUG_ON(ops == NULL); num_pages = calc_pages_for(ofs , len); pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) return PTR_ERR(pages); - if (!orig_ops) { - payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); - ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); - if (ret < 0) - goto done; - - if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { - ret = ceph_copy_to_page_vector(pages, buf, ofs, len); - if (ret < 0) - goto done_ops; - } - } - - ret = rbd_do_request(NULL, dev, snapc, snapid, - obj, ofs, len, NULL, + ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, + object_name, ofs, len, NULL, pages, num_pages, flags, ops, - 2, NULL, 0, NULL, linger_req, ver); if (ret < 0) - goto done_ops; + goto done; if ((flags & CEPH_OSD_FLAG_READ) && buf) ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); -done_ops: - if (!orig_ops) - rbd_destroy_ops(ops); done: ceph_release_page_vector(pages, num_pages); return ret; @@ -1070,10 +1070,10 @@ done: * Do an asynchronous ceph osd operation */ static int rbd_do_op(struct request *rq, - struct rbd_device *rbd_dev , + struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, u64 snapid, - int opcode, int flags, int num_reply, + int opcode, int flags, u64 ofs, u64 len, struct bio *bio, struct rbd_req_coll *coll, @@ -1091,14 +1091,15 @@ static int rbd_do_op(struct request *rq, return -ENOMEM; seg_len = rbd_get_segment(&rbd_dev->header, - rbd_dev->header.block_name, + rbd_dev->header.object_prefix, ofs, len, seg_name, &seg_ofs); payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); - ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); - if (ret < 0) + ret = -ENOMEM; + ops = rbd_create_rw_ops(1, opcode, payload_len); + if (!ops) goto done; /* we've taken care of segment sizes earlier when we @@ -1112,7 +1113,6 @@ static int rbd_do_op(struct request *rq, NULL, 0, flags, ops, - num_reply, coll, coll_index, rbd_req_cb, 0, NULL); @@ -1136,7 +1136,6 @@ static int rbd_req_write(struct request *rq, return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - 2, ofs, len, bio, coll, coll_index); } @@ -1155,55 +1154,58 @@ static int rbd_req_read(struct request *rq, snapid, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, - 2, ofs, len, bio, coll, coll_index); } /* * Request sync osd read */ -static int rbd_req_sync_read(struct rbd_device *dev, - struct ceph_snap_context *snapc, +static int rbd_req_sync_read(struct rbd_device *rbd_dev, u64 snapid, - const char *obj, + const char *object_name, u64 ofs, u64 len, char *buf, u64 *ver) { - return rbd_req_sync_op(dev, NULL, + struct ceph_osd_req_op *ops; + int ret; + + ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); + if (!ops) + return -ENOMEM; + + ret = rbd_req_sync_op(rbd_dev, NULL, snapid, - CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, - NULL, - 1, obj, ofs, len, buf, NULL, ver); + ops, object_name, ofs, len, buf, NULL, ver); + rbd_destroy_ops(ops); + + return ret; } /* * Request sync osd watch */ -static int rbd_req_sync_notify_ack(struct rbd_device *dev, +static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, u64 ver, - u64 notify_id, - const char *obj) + u64 notify_id) { struct ceph_osd_req_op *ops; - struct page **pages = NULL; int ret; - ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0); - if (ret < 0) - return ret; + ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); + if (!ops) + return -ENOMEM; - ops[0].watch.ver = cpu_to_le64(dev->header.obj_version); + ops[0].watch.ver = cpu_to_le64(ver); ops[0].watch.cookie = notify_id; ops[0].watch.flag = 0; - ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP, - obj, 0, 0, NULL, - pages, 0, + ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, + rbd_dev->header_name, 0, 0, NULL, + NULL, 0, CEPH_OSD_FLAG_READ, ops, - 1, NULL, 0, rbd_simple_req_cb, 0, NULL); @@ -1213,54 +1215,53 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev, static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) { - struct rbd_device *dev = (struct rbd_device *)data; + struct rbd_device *rbd_dev = (struct rbd_device *)data; + u64 hver; int rc; - if (!dev) + if (!rbd_dev) return; - dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, - notify_id, (int)opcode); - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rc = __rbd_refresh_header(dev); - mutex_unlock(&ctl_mutex); + dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", + rbd_dev->header_name, (unsigned long long) notify_id, + (unsigned int) opcode); + rc = rbd_refresh_header(rbd_dev, &hver); if (rc) pr_warning(RBD_DRV_NAME "%d got notification but failed to " - " update snaps: %d\n", dev->major, rc); + " update snaps: %d\n", rbd_dev->major, rc); - rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); + rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); } /* * Request sync osd watch */ -static int rbd_req_sync_watch(struct rbd_device *dev, - const char *obj, - u64 ver) +static int rbd_req_sync_watch(struct rbd_device *rbd_dev) { struct ceph_osd_req_op *ops; - struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + int ret; - int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); - if (ret < 0) - return ret; + ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); + if (!ops) + return -ENOMEM; ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, - (void *)dev, &dev->watch_event); + (void *)rbd_dev, &rbd_dev->watch_event); if (ret < 0) goto fail; - ops[0].watch.ver = cpu_to_le64(ver); - ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie); + ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); + ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); ops[0].watch.flag = 1; - ret = rbd_req_sync_op(dev, NULL, + ret = rbd_req_sync_op(rbd_dev, NULL, CEPH_NOSNAP, - 0, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, ops, - 1, obj, 0, 0, NULL, - &dev->watch_request, NULL); + rbd_dev->header_name, + 0, 0, NULL, + &rbd_dev->watch_request, NULL); if (ret < 0) goto fail_event; @@ -1269,8 +1270,8 @@ static int rbd_req_sync_watch(struct rbd_dev |