diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2010-10-19 09:13:04 +0200 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-10-19 09:13:04 +0200 |
commit | fa251f89903d73989e2f63e13d0eaed1e07ce0da (patch) | |
tree | 3f7fe779941e3b6d67754dd7c44a32f48ea47c74 /fs/ceph | |
parent | dd3932eddf428571762596e17b65f5dc92ca361b (diff) | |
parent | cd07202cc8262e1669edff0d97715f3dd9260917 (diff) |
Merge branch 'v2.6.36-rc8' into for-2.6.37/barrier
Conflicts:
block/blk-core.c
drivers/block/loop.c
mm/swapfile.c
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/Kconfig | 1 | ||||
-rw-r--r-- | fs/ceph/addr.c | 19 | ||||
-rw-r--r-- | fs/ceph/auth_x.c | 15 | ||||
-rw-r--r-- | fs/ceph/caps.c | 90 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 4 | ||||
-rw-r--r-- | fs/ceph/dir.c | 12 | ||||
-rw-r--r-- | fs/ceph/export.c | 21 | ||||
-rw-r--r-- | fs/ceph/file.c | 2 | ||||
-rw-r--r-- | fs/ceph/inode.c | 16 | ||||
-rw-r--r-- | fs/ceph/locks.c | 14 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 103 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/osd_client.c | 4 | ||||
-rw-r--r-- | fs/ceph/pagelist.c | 12 | ||||
-rw-r--r-- | fs/ceph/snap.c | 177 | ||||
-rw-r--r-- | fs/ceph/super.h | 16 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 1 |
17 files changed, 299 insertions, 211 deletions
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc87b9c1d27..0fcd2640c23 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -3,6 +3,7 @@ config CEPH_FS depends on INET && EXPERIMENTAL select LIBCRC32C select CRYPTO_AES + select CRYPTO help Choose Y or M here to include support for mounting the experimental Ceph distributed file system. Ceph is an extremely diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 5598a0d0229..efbc604001c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page) /* dirty the head */ spin_lock(&inode->i_lock); - if (ci->i_wrbuffer_ref_head == 0) + if (ci->i_head_snapc == NULL) ci->i_head_snapc = ceph_get_snap_context(snapc); ++ci->i_wrbuffer_ref_head; if (ci->i_wrbuffer_ref == 0) @@ -105,13 +105,7 @@ static int ceph_set_page_dirty(struct page *page) spin_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(!PageUptodate(page)); - - if (mapping_cap_account_dirty(mapping)) { - __inc_zone_page_state(page, NR_FILE_DIRTY); - __inc_bdi_stat(mapping->backing_dev_info, - BDI_RECLAIMABLE); - task_io_account_write(PAGE_CACHE_SIZE); - } + account_page_dirtied(page, page->mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); @@ -352,7 +346,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode, break; } } - if (!snapc && ci->i_head_snapc) { + if (!snapc && ci->i_wrbuffer_ref_head) { snapc = ceph_get_snap_context(ci->i_head_snapc); dout(" head snapc %p has %d dirty pages\n", snapc, ci->i_wrbuffer_ref_head); @@ -417,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) if (i_size < page_off + len) len = i_size - page_off; - dout("writepage %p page %p index %lu on %llu~%u\n", - inode, page, page->index, page_off, len); + dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", + inode, page, page->index, page_off, len, snapc); writeback_stat = atomic_long_inc_return(&client->writeback_count); if (writeback_stat > @@ -772,7 +766,8 @@ get_more_pages: /* ok */ if (locked_pages == 0) { /* prepare async write request */ - offset = page->index << PAGE_CACHE_SHIFT; + offset = (unsigned long long)page->index + << PAGE_CACHE_SHIFT; len = wsize; req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 582e0b2caf8..a2d002cbdec 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -376,7 +376,7 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) th = get_ticket_handler(ac, service); - if (!th) { + if (IS_ERR(th)) { *pneed |= service; continue; } @@ -399,6 +399,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, struct ceph_x_ticket_handler *th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); + if (IS_ERR(th)) + return PTR_ERR(th); + ceph_x_validate_tickets(ac, &need); dout("build_request want %x have %x need %x\n", @@ -450,7 +453,6 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, return -ERANGE; head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); - BUG_ON(!th); ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); if (ret) return ret; @@ -505,7 +507,8 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, case CEPHX_GET_PRINCIPAL_SESSION_KEY: th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); - BUG_ON(!th); + if (IS_ERR(th)) + return PTR_ERR(th); ret = ceph_x_proc_ticket_reply(ac, &th->session_key, buf + sizeof(*head), end); break; @@ -563,8 +566,8 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, void *end = p + sizeof(au->reply_buf); th = get_ticket_handler(ac, au->service); - if (!th) - return -EIO; /* hrm! */ + if (IS_ERR(th)) + return PTR_ERR(th); ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); if (ret < 0) return ret; @@ -626,7 +629,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, struct ceph_x_ticket_handler *th; th = get_ticket_handler(ac, peer_type); - if (th && !IS_ERR(th)) + if (!IS_ERR(th)) remove_ticket_handler(ac, th); } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7bf182b0397..5e9da996a15 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; - if (ci->i_rdcache_ref || ci->i_rdcache_gen) + if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; @@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, gid_t gid; struct ceph_mds_session *session; u64 xattr_version = 0; + struct ceph_buffer *xattr_blob = NULL; int delayed = 0; u64 flush_tid = 0; int i; @@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, for (i = 0; i < CEPH_CAP_BITS; i++) if (flushing & (1 << i)) ci->i_cap_flush_tid[i] = flush_tid; + + follows = ci->i_head_snapc->seq; + } else { + follows = 0; } keep = cap->implemented; @@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, mtime = inode->i_mtime; atime = inode->i_atime; time_warp_seq = ci->i_time_warp_seq; - follows = ci->i_snap_realm->cached_context->seq; uid = inode->i_uid; gid = inode->i_gid; mode = inode->i_mode; - if (dropping & CEPH_CAP_XATTR_EXCL) { + if (flushing & CEPH_CAP_XATTR_EXCL) { __ceph_build_xattrs_blob(ci); - xattr_version = ci->i_xattrs.version + 1; + xattr_blob = ci->i_xattrs.blob; + xattr_version = ci->i_xattrs.version; } spin_unlock(&inode->i_lock); @@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, - uid, gid, mode, - xattr_version, - (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL, + uid, gid, mode, xattr_version, xattr_blob, follows); if (ret < 0) { dout("error sending cap msg, must requeue %p\n", inode); @@ -1192,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * + * Unless @again is true, skip cap_snaps that were already sent to + * the MDS (i.e., during this session). + * * Called under i_lock. Takes s_mutex as needed. */ void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession) + struct ceph_mds_session **psession, + int again) __releases(ci->vfs_inode->i_lock) __acquires(ci->vfs_inode->i_lock) { @@ -1224,7 +1231,7 @@ retry: * pages to be written out. */ if (capsnap->dirty_pages || capsnap->writing) - continue; + break; /* * if cap writeback already occurred, we should have dropped @@ -1237,6 +1244,13 @@ retry: dout("no auth cap (migrating?), doing nothing\n"); goto out; } + + /* only flush each capsnap once */ + if (!again && !list_empty(&capsnap->flushing_item)) { + dout("already flushed %p, skipping\n", capsnap); + continue; + } + mds = ci->i_auth_cap->session->s_mds; mseq = ci->i_auth_cap->mseq; @@ -1273,8 +1287,8 @@ retry: &session->s_cap_snaps_flushing); spin_unlock(&inode->i_lock); - dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", - inode, capsnap, next_follows, capsnap->size); + dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", + inode, capsnap, capsnap->follows, capsnap->flush_tid); send_cap_msg(session, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, @@ -1282,7 +1296,7 @@ retry: &capsnap->mtime, &capsnap->atime, capsnap->time_warp_seq, capsnap->uid, capsnap->gid, capsnap->mode, - 0, NULL, + capsnap->xattr_version, capsnap->xattr_blob, capsnap->follows); next_follows = capsnap->follows + 1; @@ -1311,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) struct inode *inode = &ci->vfs_inode; spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, NULL); + __ceph_flush_snaps(ci, NULL, 0); spin_unlock(&inode->i_lock); } @@ -1332,7 +1346,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { - dout(" inode %p now dirty\n", &ci->vfs_inode); + if (!ci->i_head_snapc) + ci->i_head_snapc = ceph_get_snap_context( + ci->i_snap_realm->cached_context); + dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, + ci->i_head_snapc); BUG_ON(!list_empty(&ci->i_dirty_item)); spin_lock(&mdsc->cap_dirty_lock); list_add(&ci->i_dirty_item, &mdsc->cap_dirty); @@ -1470,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* flush snaps first time around only */ if (!list_empty(&ci->i_cap_snaps)) - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); goto retry_locked; retry: spin_lock(&inode->i_lock); @@ -1887,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, if (cap && cap->session == session) { dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, cap, capsnap); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 1); } else { pr_err("%p auth cap %p not mds%d ???\n", inode, cap, session->s_mds); @@ -2190,7 +2208,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, if (ci->i_head_snapc == snapc) { ci->i_wrbuffer_ref_head -= nr; - if (!ci->i_wrbuffer_ref_head) { + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { + BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; } @@ -2263,7 +2283,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; - int seq = le32_to_cpu(grant->seq); + unsigned seq = le32_to_cpu(grant->seq); + unsigned issue_seq = le32_to_cpu(grant->issue_seq); int newcaps = le32_to_cpu(grant->caps); int issued, implemented, used, wanted, dirty; u64 size = le64_to_cpu(grant->size); @@ -2275,8 +2296,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, int revoked_rdcache = 0; int queue_invalidate = 0; - dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", - inode, cap, mds, seq, ceph_cap_string(newcaps)); + dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", + inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, inode->i_size); @@ -2372,6 +2393,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, } cap->seq = seq; + cap->issue_seq = issue_seq; /* file layout may have changed */ ci->i_layout = grant->layout; @@ -2483,6 +2505,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, dout(" inode %p now clean\n", inode); BUG_ON(!list_empty(&ci->i_dirty_item)); drop = 1; + if (ci->i_wrbuffer_ref_head == 0) { + BUG_ON(!ci->i_head_snapc); + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; + } } else { BUG_ON(list_empty(&ci->i_dirty_item)); } @@ -2749,15 +2776,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, if (op == CEPH_CAP_OP_IMPORT) __queue_cap_release(session, vino.ino, cap_id, mseq, seq); - - /* - * send any full release message to try to move things - * along for the mds (who clearly thinks we still have this - * cap). - */ - ceph_add_cap_releases(mdsc, session); - ceph_send_cap_releases(mdsc, session); - goto done; + goto flush_cap_releases; } /* these will work even if we don't have a cap yet */ @@ -2785,7 +2804,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" no cap on %p ino %llx.%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), mds); spin_unlock(&inode->i_lock); - goto done; + goto flush_cap_releases; } /* note that each of these drops i_lock for us */ @@ -2809,6 +2828,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, ceph_cap_op_name(op)); } + goto done; + +flush_cap_releases: + /* + * send any full release message to try to move things + * along for the mds (who clearly thinks we still have this + * cap). + */ + ceph_add_cap_releases(mdsc, session); + ceph_send_cap_releases(mdsc, session); + done: mutex_unlock(&session->s_mutex); done_unlocked: diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 360c4f22718..6fd8b20a861 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -171,6 +171,8 @@ static int mdsc_show(struct seq_file *s, void *p) } else if (req->r_dentry) { path = ceph_mdsc_build_path(req->r_dentry, &pathlen, &pathbase, 0); + if (IS_ERR(path)) + path = NULL; spin_lock(&req->r_dentry->d_lock); seq_printf(s, " #%llx/%.*s (%s)", ceph_ino(req->r_dentry->d_parent->d_inode), @@ -187,6 +189,8 @@ static int mdsc_show(struct seq_file *s, void *p) if (req->r_old_dentry) { path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, &pathbase, 0); + if (IS_ERR(path)) + path = NULL; spin_lock(&req->r_old_dentry->d_lock); seq_printf(s, " #%llx/%.*s (%s)", ceph_ino(req->r_old_dentry->d_parent->d_inode), diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 67bbb41d552..a1986eb5204 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -46,7 +46,7 @@ int ceph_init_dentry(struct dentry *dentry) else dentry->d_op = &ceph_snap_dentry_ops; - di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS); + di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) return -ENOMEM; /* oh well */ @@ -1021,11 +1021,15 @@ out_touch: static void ceph_dentry_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); - struct inode *parent_inode = dentry->d_parent->d_inode; - u64 snapid = ceph_snap(parent_inode); + struct inode *parent_inode = NULL; + u64 snapid = CEPH_NOSNAP; + if (!IS_ROOT(dentry)) { + parent_inode = dentry->d_parent->d_inode; + if (parent_inode) + snapid = ceph_snap(parent_inode); + } dout("dentry_release %p parent %p\n", dentry, parent_inode); - if (parent_inode && snapid != CEPH_SNAPDIR) { struct ceph_inode_info *ci = ceph_inode(parent_inode); diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 4480cb1c63e..e38423e82f2 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -42,32 +42,37 @@ struct ceph_nfs_confh { static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, int connectable) { + int type; struct ceph_nfs_fh *fh = (void *)rawfh; struct ceph_nfs_confh *cfh = (void *)rawfh; struct dentry *parent = dentry->d_parent; struct inode *inode = dentry->d_inode; - int type; + int connected_handle_length = sizeof(*cfh)/4; + int handle_length = sizeof(*fh)/4; /* don't re-export snaps */ if (ceph_snap(inode) != CEPH_NOSNAP) return -EINVAL; - if (*max_len >= sizeof(*cfh)) { + if (*max_len >= connected_handle_length) { dout("encode_fh %p connectable\n", dentry); cfh->ino = ceph_ino(dentry->d_inode); cfh->parent_ino = ceph_ino(parent->d_inode); cfh->parent_name_hash = parent->d_name.hash; - *max_len = sizeof(*cfh); + *max_len = connected_handle_length; type = 2; - } else if (*max_len > sizeof(*fh)) { - if (connectable) - return -ENOSPC; + } else if (*max_len >= handle_length) { + if (connectable) { + *max_len = connected_handle_length; + return 255; + } dout("encode_fh %p\n", dentry); fh->ino = ceph_ino(dentry->d_inode); - *max_len = sizeof(*fh); + *max_len = handle_length; type = 1; } else { - return -ENOSPC; + *max_len = handle_length; + return 255; } return type; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8c044a4f045..66e4da6dba2 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -697,7 +697,7 @@ more: * start_request so that a tid has been assigned. */ spin_lock(&ci->i_unsafe_lock); - list_add(&ci->i_unsafe_writes, &req->r_unsafe_item); + list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); spin_unlock(&ci->i_unsafe_lock); ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5d893d31e39..62377ec37ed 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -677,6 +677,7 @@ static int fill_inode(struct inode *inode, if (ci->i_files == 0 && ci->i_subdirs == 0 && ceph_snap(inode) == CEPH_NOSNAP && (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && + (issued & CEPH_CAP_FILE_EXCL) == 0 && (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { dout(" marking %p complete (empty)\n", inode); ci->i_ceph_flags |= CEPH_I_COMPLETE; @@ -844,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) * the caller) if we fail. */ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, - bool *prehash) + bool *prehash, bool set_offset) { struct dentry *realdn; @@ -876,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } if ((!prehash || *prehash) && d_unhashed(dn)) d_rehash(dn); - ceph_set_dentry_offset(dn); + if (set_offset) + ceph_set_dentry_offset(dn); out: return dn; } @@ -1061,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, d_delete(dn); goto done; } - dn = splice_dentry(dn, in, &have_lease); + dn = splice_dentry(dn, in, &have_lease, true); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1104,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, goto done; } dout(" linking snapped dir %p to dn %p\n", in, dn); - dn = splice_dentry(dn, in, NULL); + dn = splice_dentry(dn, in, NULL, true); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1229,14 +1231,14 @@ retry_lookup: in = dn->d_inode; } else { in = ceph_get_inode(parent->d_sb, vino); - if (in == NULL) { + if (IS_ERR(in)) { dout("new_inode badness\n"); d_delete(dn); dput(dn); - err = -ENOMEM; + err = PTR_ERR(in); goto out; } - dn = splice_dentry(dn, in, NULL); + dn = splice_dentry(dn, in, NULL, false); if (IS_ERR(dn)) dn = NULL; } diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ae85af06454..ff4e753aae9 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -82,7 +82,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) length = fl->fl_end - fl->fl_start + 1; err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, - (u64)fl->fl_pid, (u64)fl->fl_nspid, + (u64)fl->fl_pid, + (u64)(unsigned long)fl->fl_nspid, lock_cmd, fl->fl_start, length, wait); if (!err) { @@ -92,7 +93,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) /* undo! This should only happen if the kernel detects * local deadlock. */ ceph_lock_message(CEPH_LOCK_FCNTL, op, file, - (u64)fl->fl_pid, (u64)fl->fl_nspid, + (u64)fl->fl_pid, + (u64)(unsigned long)fl->fl_nspid, CEPH_LOCK_UNLOCK, fl->fl_start, length, 0); dout("got %d on posix_lock_file, undid lock", err); @@ -132,7 +134,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) length = fl->fl_end - fl->fl_start + 1; err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, (u64)fl->fl_pid, (u64)fl->fl_nspid, + file, (u64)fl->fl_pid, + (u64)(unsigned long)fl->fl_nspid, lock_cmd, fl->fl_start, length, wait); if (!err) { @@ -141,7 +144,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, file, (u64)fl->fl_pid, - (u64)fl->fl_nspid, + (u64)(unsigned long)fl->fl_nspid, CEPH_LOCK_UNLOCK, fl->fl_start, length, 0); dout("got %d on flock_lock_file_wait, undid lock", err); @@ -235,7 +238,8 @@ int lock_to_ceph_filelock(struct file_lock *lock, cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); cephlock->client = cpu_to_le64(0); cephlock->pid = cpu_to_le64(lock->fl_pid); - cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); + cephlock->pid_namespace = + cpu_to_le64((u64)(unsigned long)lock->fl_nspid); switch (lock->fl_type) { case F_RDLCK: diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a75ddbf9fe3..fad95f8f260 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -560,6 +560,13 @@ static void __unregister_request(struct ceph_mds_client *mdsc, * * Called under mdsc->mutex. */ +struct dentry *get_nonsnap_parent(struct dentry *dentry) +{ + while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP) + dentry = dentry->d_parent; + return dentry; +} + static int __choose_mds(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { @@ -590,14 +597,29 @@ static int __choose_mds(struct ceph_mds_client *mdsc, if (req->r_inode) { inode = req->r_inode; } else if (req->r_dentry) { - if (req->r_dentry->d_inode) { + struct inode *dir = req->r_dentry->d_parent->d_inode; + + if (dir->i_sb != mdsc->client->sb) { + /* not this fs! */ + inode = req->r_dentry->d_inode; + } else if (ceph_snap(dir) != CEPH_NOSNAP) { + /* direct snapped/virtual snapdir requests + * based on parent dir inode */ + struct dentry *dn = + get_nonsnap_parent(req->r_dentry->d_parent); + inode = dn->d_inode; + dout("__choose_mds using nonsnap parent %p\n", inode); + } else if (req->r_dentry->d_inode) { + /* dentry target */ inode = req->r_dentry->d_inode; } else { - inode = req->r_dentry->d_parent->d_inode; + /* dir + name */ + inode = dir; hash = req->r_dentry->d_name.hash; is_hash = true; } } + dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, (int)hash, mode); if (!inode) @@ -2208,7 +2230,7 @@ static void handle_session(struct ceph_mds_session *session, pr_info("mds%d reconnect denied\n", session->s_mds); remove_session_caps(session); wake = 1; /* for good measure */ - complete_all(&mdsc->session_close_waiters); + wake_up_all(&mdsc->session_close_wq); kick_requests(mdsc, mds); break; @@ -2302,7 +2324,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); if (IS_ERR(path)) { err = PTR_ERR(path); - BUG_ON(err); + goto out_dput; } } else { path = NULL; @@ -2310,7 +2332,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, } err = ceph_pagelist_encode_string(pagelist, path, pathlen); if (err) - goto out; + goto out_free; spin_lock(&inode->i_lock); cap->seq = 0; /* reset cap seq */ @@ -2352,10 +2374,13 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, num_fcntl_locks, num_flock_locks); unlock_kernel(); + } else { + err = ceph_pagelist_append(pagelist, &rec, reclen); } -out: +out_free: kfree(path); +out_dput: dput(dentry); return err; } @@ -2876,7 +2901,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) return -ENOMEM; init_completion(&mdsc->safe_umount_waiters); - init_completion(&mdsc->session_close_waiters); + init_waitqueue_head(&mdsc->session_close_wq); INIT_LIST_HEAD(&mdsc->waiting_for_map); mdsc->sessions = NULL; mdsc->max_sessions = 0; @@ -3021,6 +3046,23 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); } +/* + * true if all sessions are closed, or we force unmount + */ +bool done_closing_sessions(struct ceph_mds_client *mdsc) +{ + int i, n = 0; + + if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) + return true; + + mutex_lock(&mdsc->mutex); + for (i = 0; i < mdsc->max_sessions; i++) + if (mdsc->sessions[i]) + n++; + mutex_unlock(&mdsc->mutex); + return n == 0; +} /* * called after sb is ro. @@ -3029,45 +3071,32 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { struct ceph_mds_session *session; int i; - int n; struct ceph_client *client = mdsc->client; - unsigned long started, timeout = client->mount_args->mount_timeout * HZ; + unsigned long timeout = client->mount_args->mount_timeout * HZ; dout("close_sessions\n"); - mutex_lock(&mdsc->mutex); - /* close sessions */ - started = jiffies; - while (time_before(jiffies, started + timeout)) { - dout("closing sessions\n"); - n = 0; - for (i = 0; i < mdsc->max_sessions; i++) { - session = __ceph_lookup_mds_session(mdsc, i); - if (!session) - continue; - mutex_unlock(&mdsc->mutex); - mutex_lock(&session->s_mutex); - __close_session(mdsc, session); - mutex_unlock(&session->s_mutex); - ceph_put_mds_session(session); - mutex_lock(&mdsc->mutex); - n++; - } - if (n == 0) - break; - - if (client->mount_state == CEPH_MOUNT_SHUTDOWN) - break; - - dout("waiting for sessions to close\n"); + mutex_lock(&mdsc->mutex); + for (i = 0; i < mdsc->max_sessions; i++) { + session = __ceph_lookup_mds_session(mdsc, i); + if (!session) + continue; mutex_unlock(&mdsc->mutex); - wait_for_completion_timeout(&mdsc->session_close_waiters, - timeout); + mutex_lock(&session->s_mutex); + __close_session(mdsc, session); + mutex_unlock(&session->s_mutex); + ceph_put_mds_session(session); mutex_lock(&mdsc->mutex); } + mutex_unlock(&mdsc->mutex); + + dout("waiting for sessions to close\n"); + wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc), + timeout); /* tear down remaining sessions */ + mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { if (mdsc->sessions[i]) { session = get_session(mdsc->sessions[i]); @@ -3080,9 +3109,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) mutex_lock(&mdsc->mutex); } } - WARN_ON(!list_empty(&mdsc->cap_delay_list)); - mutex_unlock(&mdsc->mutex); ceph_cleanup_empty_realms(mdsc); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ab7e89f5e34..c98267ce6d2 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -234,7 +234,8 @@ struct ceph_mds_client { struct mutex mutex; /* all nested structures */ struct ceph_mdsmap *mdsmap; - struct completion safe_umount_waiters, session_close_waiters; + struct completion safe_umount_waiters; + wait_queue_head_t session_close_wq; struct list_head waiting_for_map; struct ceph_mds_session **sessions; /* NULL for mds if no session */ diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index bed6391e52c..3b5571b8ce2 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -549,7 +549,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, */ static void __cancel_request(struct ceph_osd_request *req) { - if (req->r_sent) { + if (req->r_sent && req->r_osd) { ceph_con_revoke(&req->r_osd->o_con, req->r_request); req->r_sent = 0; } @@ -661,7 +661,7 @@ static int __send_request(struct ceph_osd_client *osdc, reqhead->reassert_version = req->r_reassert_version; req->r_stamp = jiffies; - list_move_tail(&osdc->req_lru, &req->r_req_lru_item); + list_move_tail(&req->r_req_lru_item, &osdc->req_lru); ceph_msg_get(req->r_request); /* send consumes a ref */ ceph_con_send(&req->r_osd->o_con, req->r_request); diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index b6859f47d36..46a368b6dce 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c @@ -5,10 +5,18 @@ #include "pagelist.h" +static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) +{ + struct page *page = list_entry(pl->head.prev, struct page, + lru); + kunmap(page); +} + int ceph_pagelist_release(struct ceph_pagelist *pl) { if (pl->mapped_tail) - kunmap(pl->mapped_tail); + ceph_pagelist_unmap_tail(pl); + while (!list_empty(&pl->head)) { struct page *page = list_first_entry(&pl->head, struct page, lru); @@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl) pl->room += PAGE_SIZE; list_add_tail(&page->lru, &pl->head); if (pl->mapped_tail) - kunmap(pl->mapped_tail); + ceph_pagelist_unmap_tail(pl); pl->mapped_tail = kmap(page); return 0; } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c0b26b6badb..190b6c4a6f2 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( INIT_LIST_HEAD(&realm->children); INIT_LIST_HEAD(&realm->child_item); INIT_LIST_HEAD(&realm->empty_item); + INIT_LIST_HEAD(&realm->dirty_item); INIT_LIST_HEAD(&realm->inodes_with_caps); spin_lock_init(&realm->inodes_with_caps_lock); __insert_snap_realm(&mdsc->snap_realms, realm); @@ -435,7 +436,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) { struct inode *inode = &ci->vfs_inode; struct ceph_cap_snap *capsnap; - int used; + int used, dirty; capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); if (!capsnap) { @@ -445,6 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) spin_lock(&inode->i_lock); used = __ceph_caps_used(ci); + dirty = __ceph_caps_dirty(ci); if (__ceph_have_pending_cap_snap(ci)) { /* there is no point in queuing multiple "pending" cap_snaps, as no new writes are allowed to start when pending, so any @@ -452,27 +454,37 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) cap_snap. lucky us. */ dout("queue_cap_snap %p already pending\n", inode); kfree(capsnap); - } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { + } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) || + (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| + CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { struct ceph_snap_context *snapc = ci->i_head_snapc; + dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, + capsnap, snapc); igrab(inode); - + atomic_set(&capsnap->nref, 1); capsnap->ci = ci; INIT_LIST_HEAD(&capsnap->ci_item); INIT_LIST_HEAD(&capsnap->flushing_item); - capsnap->follows = snapc->seq - 1; + capsnap->follows = snapc->seq; capsnap->issued = __ceph_caps_issued(ci, NULL); - capsnap->dirty = __ceph_caps_dirty(ci); + capsnap->dirty = dirty; capsnap->mode = inode->i_mode; capsnap->uid = inode->i_uid; capsnap->gid = inode->i_gid; - /* fixme? */ - capsnap->xattr_blob = NULL; - capsnap->xattr_len = 0; + if (dirty & CEPH_CAP_XATTR_EXCL) { + __ceph_build_xattrs_blob(ci); + capsnap->xattr_blob = + ceph_buffer_get(ci->i_xattrs.blob); + capsnap->xattr_version = ci->i_xattrs.version; + } else { + capsnap->xattr_blob = NULL; + capsnap->xattr_version = 0; + } /* dirty page count moved from _head to this cap_snap; all subsequent writes page dirties occur _after_ this @@ -480,7 +492,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) capsnap->dirty_pages = ci->i_wrbuffer_ref_head; ci->i_wrbuffer_ref_head = 0; capsnap->context = snapc; - ci->i_head_snapc = NULL; + ci->i_head_snapc = + ceph_get_snap_context(ci->i_snap_realm->cached_context); + dout(" new snapc is %p\n", ci->i_head_snapc); list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); if (used & CEPH_CAP_FILE_WR) { @@ -539,6 +553,41 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, return 1; /* caller may want to ceph_flush_snaps */ } +/* + * Queue cap_snaps for snap writeback for this realm and its children. + * Called under snap_rwsem, so realm topology won't change. + */ +static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) +{ + struct ceph_inode_info *ci; + struct inode *lastinode = NULL; + struct ceph_snap_realm *child; + + dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino); + + spin_lock(&realm->inodes_with_caps_lock); + list_for_each_entry(ci, &realm->inodes_with_caps, + i_snap_realm_item) { + struct inode *inode = igrab(&ci->vfs_inode); + if (!inode) + continue; + spin_unlock(&realm->inodes_with_caps_lock); + if (lastinode) + iput(lastinode); + lastinode = inode; + ceph_queue_cap_snap(ci); + spin_lock(&realm->inodes_with_caps_lock); + } + spin_unlock(&realm->inodes_with_caps_lock); + if (lastinode) + iput(lastinode); + + dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); + list_for_each_entry(child, &realm->children, child_item) + queue_realm_cap_snaps(child); + + dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); +} /* * Parse and apply a snapblob "snap trace" from the MDS. This specifies @@ -556,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm; int invalidate = 0; int err = -ENOMEM; + LIST_HEAD(dirty_realms); dout("update_snap_trace deletion=%d\n", deletion); more: @@ -578,45 +628,6 @@ more: } } - if (le64_to_cpu(ri->seq) > realm->seq) { - dout("update_snap_trace updating %llx %p %lld -> %lld\n", - realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); - /* - * if the realm seq has changed, queue a cap_snap for every - * inode with open caps. we do this _before_ we update - * the realm info so that we prepare for writeback under the - * _previous_ snap context. - * - * ...unless it's a snap deletion! - */ - if (!deletion) { - struct ceph_inode_info *ci; - struct inode *lastinode = NULL; - - spin_lock(&realm->inodes_with_caps_lock); - list_for_each_entry(ci, &realm->inodes_with_caps, - i_snap_realm_item) { - struct inode *inode = igrab(&ci->vfs_inode); - if (!inode) - continue; - spin_unlock(&realm->inodes_with_caps_lock); - if (lastinode) - iput(lastinode); - lastinode = inode; - ceph_queue_cap_snap(ci); - spin_lock(&realm->inodes_with_caps_lock); - } - spin_unlock(&realm->inodes_with_caps_lock); - if (lastinode) - iput(lastinode); - dout("update_snap_trace cap_snaps queued\n"); - } - - } else { - dout("update_snap_trace %llx %p seq %lld unchanged\n", - realm->ino, realm, realm->seq); - } - /* ensure the parent is correct */ err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); if (err < 0) @@ -624,6 +635,8 @@ more: invalidate += err; if (le64_to_cpu(ri->seq) > realm->seq) { + dout("update_snap_trace updating %llx %p %lld -> %lld\n", + realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); /* update realm parameters, snap lists */ realm->seq = le64_to_cpu(ri->seq); realm->created = le64_to_cpu(ri->created); @@ -641,9 +654,17 @@ more: if (err < 0) goto fail; + /* queue realm for cap_snap creation */ + list_add(&realm->dirty_item, &dirty_realms); + invalidate = 1; } else if (!realm->cached_context) { + dout("update_snap_trace %llx %p seq %lld new\n", + realm->ino, realm, realm->seq); invalidate = 1; + } else { + dout("update_snap_trace %llx %p seq %lld unchanged\n", + realm->ino, realm, realm->seq); } dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, @@ -656,6 +677,14 @@ more: if (invalidate) rebuild_snap_realms(realm); + /* + * queue cap snaps _after_ we've built the new snap contexts, + * so that i_head_snapc can be set appropriately. + */ + list_for_each_entry(realm, &dirty_realms, dirty_item) { + queue_realm_cap_snaps(realm); + } + __cleanup_empty_realms(mdsc); return 0; @@ -688,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) igrab(inode); spin_unlock(&mdsc->snap_flush_lock); spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); spin_unlock(&inode->i_lock); iput(inode); spin_lock(&mdsc->snap_flush_lock); @@ -789,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, }; struct inode *inode = ceph_find_inode(sb, vino); struct ceph_inode_info *ci; + struct ceph_snap_realm *oldrealm; if (!inode) continue; @@ -814,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, dout(" will move %p to split realm %llx %p\n", inode, realm->ino, realm); /* - * Remove the inode from the realm's inode - * list, but don't add it to the new realm - * yet. We don't want the cap_snap to be - * queued (again) by ceph_update_snap_trace() - * below. Queue it _now_, under the old context. + * Move the inode to the new realm */ spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + list_add(&ci->i_snap_realm_item, + &realm->inodes_with_caps); + oldrealm = ci->i_snap_realm; + ci->i_snap_realm = realm; spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&inode->i_lock); - ceph_queue_cap_snap(ci); + ceph_get_snap_realm(mdsc, realm); + ceph_put_snap_realm(mdsc, oldrealm); iput(inode); continue; @@ -853,43 +884,9 @@ skip_inode: ceph_update_snap_trace(mdsc, p, e, op == CEPH_SNAP_OP_DESTROY); - if (op == CEPH_SNAP_OP_SPLIT) { - /* - * ok, _now_ add the inodes into the new realm. - */ - for (i = 0; i < num_split_inos; i++) { - struct ceph_vino vino = { - .ino = le64_to_cpu(split_inos[i]), - .snap = CEPH_NOSNAP, - }; - struct inode *inode = ceph_find_inode(sb, vino); - struct ceph_inode_info *ci; - - if (!inode) - continue; - ci = ceph_inode(inode); - spin_lock(&inode->i_lock); - if (list_empty(&ci->i_snap_realm_item)) { - struct ceph_snap_realm *oldrealm = - ci->i_snap_realm; - - dout(" moving %p to split realm %llx %p\n", - inode, realm->ino, realm); - spin_lock(&realm->inodes_with_caps_lock); - list_add(&ci->i_snap_realm_item, - &realm->inodes_with_caps); - ci->i_snap_realm = realm; - spin_unlock(&realm->inodes_with_caps_lock); - ceph_get_snap_realm(mdsc, realm); - ceph_put_snap_realm(mdsc, oldrealm); - } - spin_unlock(&inode->i_lock); - iput(inode); - } - + if (op == CEPH_SNAP_OP_SPLIT) /* we took a reference when we created the realm, above */ ceph_put_snap_realm(mdsc, realm); - } __cleanup_empty_realms(mdsc); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2482d696f0d..b87638e84c4 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -216,8 +216,7 @@ struct ceph_cap_snap { uid_t uid; gid_t gid; - void *xattr_blob; - int xattr_len; + struct ceph_buffer *xattr_blob; u64 xattr_version; u64 size; @@ -229,8 +228,11 @@ struct ceph_cap_snap { static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) { - if (atomic_dec_and_test(&capsnap->nref)) + if (atomic_dec_and_test(&capsnap->nref)) { + if (capsnap->xattr_blob) + ceph_buffer_put(capsnap->xattr_blob); kfree(capsnap); + } } /* @@ -342,7 +344,8 @@ struct ceph_inode_info { unsigned i_cap_exporting_issued; struct ceph_cap_reservation i_cap_migration_resv; struct list_head i_cap_snaps; /* snapped state pending flush to mds */ - struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 */ + struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or + dirty|flushing caps */ unsigned i_snap_caps; /* cap bits for snapped files */ int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ @@ -687,6 +690,8 @@ struct ceph_snap_realm { struct list_head empty_item; /* if i have ref==0 */ + struct list_head dirty_item; /* if realm needs new context */ + /* the current set of snaps for this realm */ struct ceph_snap_context *cached_context; @@ -823,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc); extern void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession); + struct ceph_mds_session **psession, + int again); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 097a2654c00..9578af610b7 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -485,6 +485,7 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; ci->i_xattrs.prealloc_blob = NULL; ci->i_xattrs.dirty = false; + ci->i_xattrs.version++; } } |