diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-11 09:18:32 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-11 09:18:32 -0700 |
commit | 682c30ed2165d5694a414d31eac7c63ac5700fb0 (patch) | |
tree | 0dd5c95637222f05f3f89511453387b03c85f6f4 /fs | |
parent | 84479f3c17e2c452d22be10a967e5282b3742d9f (diff) | |
parent | e56fa10e92e077d456cbc33b7025032887772b33 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (39 commits)
ceph: generalize mon requests, add pool op support
ceph: only queue async writeback on cap revocation if there is dirty data
ceph: do not ignore osd_idle_ttl mount option
ceph: constify dentry_operations
ceph: whitespace cleanup
ceph: add flock/fcntl lock support
ceph: define on-wire types, constants for file locking support
ceph: add CEPH_FEATURE_FLOCK to the supported feature bits
ceph: support v2 reconnect encoding
ceph: support v2 client_caps encoding
ceph: move AES iv definition to shared header
ceph: fix decoding of pool snap info
ceph: make ->sync_fs not wait if wait==0
ceph: warn on missing snap realm
ceph: print useful error message when crush rule not found
ceph: use %pU to print uuid (fsid)
ceph: sync header defs with server code
ceph: clean up header guards
ceph: strip misleading/obsolete version, feature info
ceph: specify supported features in super.h
...
Diffstat (limited to 'fs')
39 files changed, 1162 insertions, 410 deletions
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 6a660e610be..278e1172600 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -6,7 +6,7 @@ ifneq ($(KERNELRELEASE),) obj-$(CONFIG_CEPH_FS) += ceph.o -ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ +ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ export.o caps.o snap.o xattr.o \ messenger.o msgpool.o buffer.o pagelist.o \ mds_client.o mdsmap.o \ diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d9c60b84949..5598a0d0229 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -309,7 +309,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, zero_user_segment(page, s, PAGE_CACHE_SIZE); } - if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, page->index, + GFP_NOFS)) { page_cache_release(page); dout("readpages %p add_to_page_cache failed %p\n", inode, page); @@ -552,7 +553,7 @@ static void writepages_finish(struct ceph_osd_request *req, * page truncation thread, possibly losing some data that * raced its way in */ - if ((issued & CEPH_CAP_FILE_CACHE) == 0) + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) generic_error_remove_page(inode->i_mapping, page); unlock_page(page); @@ -797,9 +798,12 @@ get_more_pages: dout("%p will write page %p idx %lu\n", inode, page, page->index); - writeback_stat = atomic_long_inc_return(&client->writeback_count); - if (writeback_stat > CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) { - set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); + writeback_stat = + atomic_long_inc_return(&client->writeback_count); + if (writeback_stat > CONGESTION_ON_THRESH( + client->mount_args->congestion_kb)) { + set_bdi_congested(&client->backing_dev_info, + BLK_RW_ASYNC); } set_page_writeback(page); @@ -1036,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, *pagep = page; dout("write_begin file %p inode %p page %p %d~%d\n", file, - inode, page, (int)pos, (int)len); + inode, page, (int)pos, (int)len); r = ceph_update_writeable_page(file, pos, len, page); } while (r == -EAGAIN); diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c index 67b2c030924..eb2a666b0be 100644 --- a/fs/ceph/armor.c +++ b/fs/ceph/armor.c @@ -1,11 +1,15 @@ #include <linux/errno.h> +int ceph_armor(char *dst, const char *src, const char *end); +int ceph_unarmor(char *dst, const char *src, const char *end); + /* * base64 encode/decode. */ -const char *pem_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char *pem_key = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static int encode_bits(int c) { diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 89490beaf53..6d2e3060062 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -20,7 +20,7 @@ static u32 supported_protocols[] = { CEPH_AUTH_CEPHX }; -int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) +static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) { switch (protocol) { case CEPH_AUTH_NONE: @@ -133,8 +133,8 @@ bad: return -ERANGE; } -int ceph_build_auth_request(struct ceph_auth_client *ac, - void *msg_buf, size_t msg_len) +static int ceph_build_auth_request(struct ceph_auth_client *ac, + void *msg_buf, size_t msg_len) { struct ceph_mon_request_header *monhdr = msg_buf; void *p = monhdr + 1; diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 6d44053ecff..582e0b2caf8 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -87,8 +87,8 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, /* * get existing (or insert new) ticket handler */ -struct ceph_x_ticket_handler *get_ticket_handler(struct ceph_auth_client *ac, - int service) +static struct ceph_x_ticket_handler * +get_ticket_handler(struct ceph_auth_client *ac, int service) { struct ceph_x_ticket_handler *th; struct ceph_x_info *xi = ac->private; @@ -429,7 +429,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, auth->struct_v = 1; auth->key = 0; for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) - auth->key ^= *u; + auth->key ^= *(__le64 *)u; dout(" server_challenge %llx client_challenge %llx key %llx\n", xi->server_challenge, le64_to_cpu(auth->client_challenge), le64_to_cpu(auth->key)); diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c index c67535d70aa..cd39f17021d 100644 --- a/fs/ceph/buffer.c +++ b/fs/ceph/buffer.c @@ -47,22 +47,6 @@ void ceph_buffer_release(struct kref *kref) kfree(b); } -int ceph_buffer_alloc(struct ceph_buffer *b, int len, gfp_t gfp) -{ - b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN); - if (b->vec.iov_base) { - b->is_vmalloc = false; - } else { - b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); - b->is_vmalloc = true; - } - if (!b->vec.iov_base) - return -ENOMEM; - b->alloc_len = len; - b->vec.iov_len = len; - return 0; -} - int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) { size_t len; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b81be9a5648..7bf182b0397 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -113,58 +113,41 @@ const char *ceph_cap_string(int caps) return cap_str[i]; } -/* - * Cap reservations - * - * Maintain a global pool of preallocated struct ceph_caps, referenced - * by struct ceph_caps_reservations. This ensures that we preallocate - * memory needed to successfully process an MDS response. (If an MDS - * sends us cap information and we fail to process it, we will have - * problems due to the client and MDS being out of sync.) - * - * Reservations are 'owned' by a ceph_cap_reservation context. - */ -static spinlock_t caps_list_lock; -static struct list_head caps_list; /* unused (reserved or unreserved) */ -static int caps_total_count; /* total caps allocated */ -static int caps_use_count; /* in use */ -static int caps_reserve_count; /* unused, reserved */ -static int caps_avail_count; /* unused, unreserved */ -static int caps_min_count; /* keep at least this many (unreserved) */ - -void __init ceph_caps_init(void) +void ceph_caps_init(struct ceph_mds_client *mdsc) { - INIT_LIST_HEAD(&caps_list); - spin_lock_init(&caps_list_lock); + INIT_LIST_HEAD(&mdsc->caps_list); + spin_lock_init(&mdsc->caps_list_lock); } -void ceph_caps_finalize(void) +void ceph_caps_finalize(struct ceph_mds_client *mdsc) { struct ceph_cap *cap; - spin_lock(&caps_list_lock); - while (!list_empty(&caps_list)) { - cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); + spin_lock(&mdsc->caps_list_lock); + while (!list_empty(&mdsc->caps_list)) { + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); list_del(&cap->caps_item); kmem_cache_free(ceph_cap_cachep, cap); } - caps_total_count = 0; - caps_avail_count = 0; - caps_use_count = 0; - caps_reserve_count = 0; - caps_min_count = 0; - spin_unlock(&caps_list_lock); + mdsc->caps_total_count = 0; + mdsc->caps_avail_count = 0; + mdsc->caps_use_count = 0; + mdsc->caps_reserve_count = 0; + mdsc->caps_min_count = 0; + spin_unlock(&mdsc->caps_list_lock); } -void ceph_adjust_min_caps(int delta) +void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) { - spin_lock(&caps_list_lock); - caps_min_count += delta; - BUG_ON(caps_min_count < 0); - spin_unlock(&caps_list_lock); + spin_lock(&mdsc->caps_list_lock); + mdsc->caps_min_count += delta; + BUG_ON(mdsc->caps_min_count < 0); + spin_unlock(&mdsc->caps_list_lock); } -int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) +int ceph_reserve_caps(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx, int need) { int i; struct ceph_cap *cap; @@ -176,16 +159,17 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) dout("reserve caps ctx=%p need=%d\n", ctx, need); /* first reserve any caps that are already allocated */ - spin_lock(&caps_list_lock); - if (caps_avail_count >= need) + spin_lock(&mdsc->caps_list_lock); + if (mdsc->caps_avail_count >= need) have = need; else - have = caps_avail_count; - caps_avail_count -= have; - caps_reserve_count += have; - BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + - caps_avail_count); - spin_unlock(&caps_list_lock); + have = mdsc->caps_avail_count; + mdsc->caps_avail_count -= have; + mdsc->caps_reserve_count += have; + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + + mdsc->caps_avail_count); + spin_unlock(&mdsc->caps_list_lock); for (i = have; i < need; i++) { cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); @@ -198,19 +182,20 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) } BUG_ON(have + alloc != need); - spin_lock(&caps_list_lock); - caps_total_count += alloc; - caps_reserve_count += alloc; - list_splice(&newcaps, &caps_list); + spin_lock(&mdsc->caps_list_lock); + mdsc->caps_total_count += alloc; + mdsc->caps_reserve_count += alloc; + list_splice(&newcaps, &mdsc->caps_list); - BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + - caps_avail_count); - spin_unlock(&caps_list_lock); + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + + mdsc->caps_avail_count); + spin_unlock(&mdsc->caps_list_lock); ctx->count = need; dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", - ctx, caps_total_count, caps_use_count, caps_reserve_count, - caps_avail_count); + ctx, mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); return 0; out_alloc_count: @@ -220,26 +205,29 @@ out_alloc_count: return ret; } -int ceph_unreserve_caps(struct ceph_cap_reservation *ctx) +int ceph_unreserve_caps(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx) { dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); if (ctx->count) { - spin_lock(&caps_list_lock); - BUG_ON(caps_reserve_count < ctx->count); - caps_reserve_count -= ctx->count; - caps_avail_count += ctx->count; + spin_lock(&mdsc->caps_list_lock); + BUG_ON(mdsc->caps_reserve_count < ctx->count); + mdsc->caps_reserve_count -= ctx->count; + mdsc->caps_avail_count += ctx->count; ctx->count = 0; dout("unreserve caps %d = %d used + %d resv + %d avail\n", - caps_total_count, caps_use_count, caps_reserve_count, - caps_avail_count); - BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + - caps_avail_count); - spin_unlock(&caps_list_lock); + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + + mdsc->caps_avail_count); + spin_unlock(&mdsc->caps_list_lock); } return 0; } -static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) +static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx) { struct ceph_cap *cap = NULL; @@ -247,71 +235,74 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) if (!ctx) { cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); if (cap) { - caps_use_count++; - caps_total_count++; + mdsc->caps_use_count++; + mdsc->caps_total_count++; } return cap; } - spin_lock(&caps_list_lock); + spin_lock(&mdsc->caps_list_lock); dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", - ctx, ctx->count, caps_total_count, caps_use_count, - caps_reserve_count, caps_avail_count); + ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); BUG_ON(!ctx->count); - BUG_ON(ctx->count > caps_reserve_count); - BUG_ON(list_empty(&caps_list)); + BUG_ON(ctx->count > mdsc->caps_reserve_count); + BUG_ON(list_empty(&mdsc->caps_list)); ctx->count--; - caps_reserve_count--; - caps_use_count++; + mdsc->caps_reserve_count--; + mdsc->caps_use_count++; - cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); + cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); list_del(&cap->caps_item); - BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + - caps_avail_count); - spin_unlock(&caps_list_lock); + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + mdsc->caps_avail_count); + spin_unlock(&mdsc->caps_list_lock); return cap; } -void ceph_put_cap(struct ceph_cap *cap) +void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) { - spin_lock(&caps_list_lock); + spin_lock(&mdsc->caps_list_lock); dout("put_cap %p %d = %d used + %d resv + %d avail\n", - cap, caps_total_count, caps_use_count, - caps_reserve_count, caps_avail_count); - caps_use_count--; + cap, mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); + mdsc->caps_use_count--; /* * Keep some preallocated caps around (ceph_min_count), to * avoid lots of free/alloc churn. */ - if (caps_avail_count >= caps_reserve_count + caps_min_count) { - caps_total_count--; + if (mdsc->caps_avail_count >= mdsc->caps_reserve_count + + mdsc->caps_min_count) { + mdsc->caps_total_count--; kmem_cache_free(ceph_cap_cachep, cap); } else { - caps_avail_count++; - list_add(&cap->caps_item, &caps_list); + mdsc->caps_avail_count++; + list_add(&cap->caps_item, &mdsc->caps_list); } - BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + - caps_avail_count); - spin_unlock(&caps_list_lock); + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + mdsc->caps_avail_count); + spin_unlock(&mdsc->caps_list_lock); } void ceph_reservation_status(struct ceph_client *client, int *total, int *avail, int *used, int *reserved, int *min) { + struct ceph_mds_client *mdsc = &client->mdsc; + if (total) - *total = caps_total_count; + *total = mdsc->caps_total_count; if (avail) - *avail = caps_avail_count; + *avail = mdsc->caps_avail_count; if (used) - *used = caps_use_count; + *used = mdsc->caps_use_count; if (reserved) - *reserved = caps_reserve_count; + *reserved = mdsc->caps_reserve_count; if (min) - *min = caps_min_count; + *min = mdsc->caps_min_count; } /* @@ -336,22 +327,29 @@ static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) return NULL; } +struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds) +{ + struct ceph_cap *cap; + + spin_lock(&ci->vfs_inode.i_lock); + cap = __get_cap_for_mds(ci, mds); + spin_unlock(&ci->vfs_inode.i_lock); + return cap; +} + /* - * Return id of any MDS with a cap, preferably FILE_WR|WRBUFFER|EXCL, else - * -1. + * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1. */ -static int __ceph_get_cap_mds(struct ceph_inode_info *ci, u32 *mseq) +static int __ceph_get_cap_mds(struct ceph_inode_info *ci) { struct ceph_cap *cap; int mds = -1; struct rb_node *p; - /* prefer mds with WR|WRBUFFER|EXCL caps */ + /* prefer mds with WR|BUFFER|EXCL caps */ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); mds = cap->mds; - if (mseq) - *mseq = cap->mseq; if (cap->issued & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_EXCL)) @@ -364,7 +362,7 @@ int ceph_get_cap_mds(struct inode *inode) { int mds; spin_lock(&inode->i_lock); - mds = __ceph_get_cap_mds(ceph_inode(inode), NULL); + mds = __ceph_get_cap_mds(ceph_inode(inode)); spin_unlock(&inode->i_lock); return mds; } @@ -483,8 +481,8 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, * Each time we receive FILE_CACHE anew, we increment * i_rdcache_gen. */ - if ((issued & CEPH_CAP_FILE_CACHE) && - (had & CEPH_CAP_FILE_CACHE) == 0) + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && + (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) ci->i_rdcache_gen++; /* @@ -543,7 +541,7 @@ retry: new_cap = NULL; } else { spin_unlock(&inode->i_lock); - new_cap = get_cap(caps_reservation); + new_cap = get_cap(mdsc, caps_reservation); if (new_cap == NULL) return -ENOMEM; goto retry; @@ -588,6 +586,7 @@ retry: } else { pr_err("ceph_add_cap: couldn't find snap realm %llx\n", realmino); + WARN_ON(!realm); } } @@ -831,7 +830,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) { int want = 0; int mode; - for (mode = 0; mode < 4; mode++) + for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++) if (ci->i_nr_by_mode[mode]) want |= ceph_caps_for_mode(mode); return want; @@ -901,7 +900,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) ci->i_auth_cap = NULL; if (removed) - ceph_put_cap(cap); + ceph_put_cap(mdsc, cap); if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { struct ceph_snap_realm *realm = ci->i_snap_realm; @@ -1197,6 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, */ void __ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession) + __releases(ci->vfs_inode->i_lock) + __acquires(ci->vfs_inode->i_lock) { struct inode *inode = &ci->vfs_inode; int mds; @@ -1232,7 +1233,13 @@ retry: BUG_ON(capsnap->dirty == 0); /* pick mds, take s_mutex */ - mds = __ceph_get_cap_mds(ci, &mseq); + if (ci->i_auth_cap == NULL) { + dout("no auth cap (migrating?), doing nothing\n"); + goto out; + } + mds = ci->i_auth_cap->session->s_mds; + mseq = ci->i_auth_cap->mseq; + if (session && session->s_mds != mds) { dout("oops, wrong session %p mutex\n", session); mutex_unlock(&session->s_mutex); @@ -1251,8 +1258,8 @@ retry: } /* * if session == NULL, we raced against a cap - * deletion. retry, and we'll get a better - * @mds value next time. + * deletion or migration. retry, and we'll + * get a better @mds value next time. */ spin_lock(&inode->i_lock); goto retry; @@ -1290,6 +1297,7 @@ retry: list_del_init(&ci->i_snap_flush_item); spin_unlock(&mdsc->snap_flush_lock); +out: if (psession) *psession = session; else if (session) { @@ -1435,7 +1443,6 @@ static int try_nonblocking_invalidate(struct inode *inode) */ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session) - __releases(session->s_mutex) { struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); struct ceph_mds_client *mdsc = &client->mdsc; @@ -1510,11 +1517,13 @@ retry_locked: ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ ci->i_rdcache_gen && /* may have cached pages */ (file_wanted == 0 || /* no open files */ - (revoking & CEPH_CAP_FILE_CACHE)) && /* or revoking cache */ + (revoking & (CEPH_CAP_FILE_CACHE| + CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ !tried_invalidate) { dout("check_caps trying to invalidate on %p\n", inode); if (try_nonblocking_invalidate(inode) < 0) { - if (revoking & CEPH_CAP_FILE_CACHE) { + if (revoking & (CEPH_CAP_FILE_CACHE| + CEPH_CAP_FILE_LAZYIO)) { dout("check_caps queuing invalidate\n"); queue_invalidate = 1; ci->i_rdcache_revoking = ci->i_rdcache_gen; @@ -2250,8 +2259,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, struct ceph_mds_session *session, struct ceph_cap *cap, struct ceph_buffer *xattr_buf) - __releases(inode->i_lock) - __releases(session->s_mutex) + __releases(inode->i_lock) { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; @@ -2278,6 +2286,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, * will invalidate _after_ writeback.) */ if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && !ci->i_wrbuffer_ref) { if (try_nonblocking_invalidate(inode) == 0) { revoked_rdcache = 1; @@ -2369,15 +2378,22 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, /* revocation, grant, or no-op? */ if (cap->issued & ~newcaps) { - dout("revocation: %s -> %s\n", ceph_cap_string(cap->issued), - ceph_cap_string(newcaps)); - if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) - writeback = 1; /* will delay ack */ - else if (dirty & ~newcaps) - check_caps = 1; /* initiate writeback in check_caps */ - else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || - revoked_rdcache) - check_caps = 2; /* send revoke ack in check_caps */ + int revoking = cap->issued & ~newcaps; + + dout("revocation: %s -> %s (revoking %s)\n", + ceph_cap_string(cap->issued), + ceph_cap_string(newcaps), + ceph_cap_string(revoking)); + if (revoking & used & CEPH_CAP_FILE_BUFFER) + writeback = 1; /* initiate writeback; will delay ack */ + else if (revoking == CEPH_CAP_FILE_CACHE && + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && + queue_invalidate) + ; /* do nothing yet, invalidation will be queued */ + else if (cap == ci->i_auth_cap) + check_caps = 1; /* check auth cap only */ + else + check_caps = 2; /* check all caps */ cap->issued = newcaps; cap->implemented |= newcaps; } else if (cap->issued == newcaps) { @@ -2568,7 +2584,8 @@ static void handle_cap_trunc(struct inode *inode, * caller holds s_mutex */ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, - struct ceph_mds_session *session) + struct ceph_mds_session *session, + int *open_target_sessions) { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; @@ -2600,6 +2617,12 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, ci->i_cap_exporting_mds = mds; ci->i_cap_exporting_mseq = mseq; ci->i_cap_exporting_issued = cap->issued; + + /* + * make sure we have open sessions with all possible + * export targets, so that we get the matching IMPORT + */ + *open_target_sessions = 1; } __ceph_remove_cap(cap); } @@ -2675,6 +2698,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, u64 size, max_size; u64 tid; void *snaptrace; + size_t snaptrace_len; + void *flock; + u32 flock_len; + int open_target_sessions = 0; dout("handle_caps from mds%d\n", mds); @@ -2683,7 +2710,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, if (msg->front.iov_len < sizeof(*h)) goto bad; h = msg->front.iov_base; - snaptrace = h + 1; op = le32_to_cpu(h->op); vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; @@ -2693,6 +2719,21 @@ void ceph_handle_caps(struct ceph_mds_session *session, size = le64_to_cpu(h->size); max_size = le64_to_cpu(h->max_size); + snaptrace = h + 1; + snaptrace_len = le32_to_cpu(h->snap_trace_len); + + if (le16_to_cpu(msg->hdr.version) >= 2) { + void *p, *end; + + p = snaptrace + snaptrace_len; + end = msg->front.iov_base + msg->front.iov_len; + ceph_decode_32_safe(&p, end, flock_len, bad); + flock = p; + } else { + flock = NULL; + flock_len = 0; + } + mutex_lock(&session->s_mutex); session->s_seq++; dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, @@ -2714,7 +2755,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, * along for the mds (who clearly thinks we still have this * cap). */ - ceph_add_cap_releases(mdsc, session, -1); + ceph_add_cap_releases(mdsc, session); ceph_send_cap_releases(mdsc, session); goto done; } @@ -2726,12 +2767,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, goto done; case CEPH_CAP_OP_EXPORT: - handle_cap_export(inode, h, session); + handle_cap_export(inode, h, session, &open_target_sessions); goto done; case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, session, - snaptrace, le32_to_cpu(h->snap_trace_ |