aboutsummaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
authorJens Axboe <jaxboe@fusionio.com>2010-10-19 09:13:04 +0200
committerJens Axboe <jaxboe@fusionio.com>2010-10-19 09:13:04 +0200
commitfa251f89903d73989e2f63e13d0eaed1e07ce0da (patch)
tree3f7fe779941e3b6d67754dd7c44a32f48ea47c74 /fs/ceph
parentdd3932eddf428571762596e17b65f5dc92ca361b (diff)
parentcd07202cc8262e1669edff0d97715f3dd9260917 (diff)
Merge branch 'v2.6.36-rc8' into for-2.6.37/barrier
Conflicts: block/blk-core.c drivers/block/loop.c mm/swapfile.c Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/Kconfig1
-rw-r--r--fs/ceph/addr.c19
-rw-r--r--fs/ceph/auth_x.c15
-rw-r--r--fs/ceph/caps.c90
-rw-r--r--fs/ceph/debugfs.c4
-rw-r--r--fs/ceph/dir.c12
-rw-r--r--fs/ceph/export.c21
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c16
-rw-r--r--fs/ceph/locks.c14
-rw-r--r--fs/ceph/mds_client.c103
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/osd_client.c4
-rw-r--r--fs/ceph/pagelist.c12
-rw-r--r--fs/ceph/snap.c177
-rw-r--r--fs/ceph/super.h16
-rw-r--r--fs/ceph/xattr.c1
17 files changed, 299 insertions, 211 deletions
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index bc87b9c1d27..0fcd2640c23 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,6 +3,7 @@ config CEPH_FS
depends on INET && EXPERIMENTAL
select LIBCRC32C
select CRYPTO_AES
+ select CRYPTO
help
Choose Y or M here to include support for mounting the
experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5598a0d0229..efbc604001c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
/* dirty the head */
spin_lock(&inode->i_lock);
- if (ci->i_wrbuffer_ref_head == 0)
+ if (ci->i_head_snapc == NULL)
ci->i_head_snapc = ceph_get_snap_context(snapc);
++ci->i_wrbuffer_ref_head;
if (ci->i_wrbuffer_ref == 0)
@@ -105,13 +105,7 @@ static int ceph_set_page_dirty(struct page *page)
spin_lock_irq(&mapping->tree_lock);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(!PageUptodate(page));
-
- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
- task_io_account_write(PAGE_CACHE_SIZE);
- }
+ account_page_dirtied(page, page->mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
@@ -352,7 +346,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
break;
}
}
- if (!snapc && ci->i_head_snapc) {
+ if (!snapc && ci->i_wrbuffer_ref_head) {
snapc = ceph_get_snap_context(ci->i_head_snapc);
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
@@ -417,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
if (i_size < page_off + len)
len = i_size - page_off;
- dout("writepage %p page %p index %lu on %llu~%u\n",
- inode, page, page->index, page_off, len);
+ dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
+ inode, page, page->index, page_off, len, snapc);
writeback_stat = atomic_long_inc_return(&client->writeback_count);
if (writeback_stat >
@@ -772,7 +766,8 @@ get_more_pages:
/* ok */
if (locked_pages == 0) {
/* prepare async write request */
- offset = page->index << PAGE_CACHE_SHIFT;
+ offset = (unsigned long long)page->index
+ << PAGE_CACHE_SHIFT;
len = wsize;
req = ceph_osdc_new_request(&client->osdc,
&ci->i_layout,
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 582e0b2caf8..a2d002cbdec 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -376,7 +376,7 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
th = get_ticket_handler(ac, service);
- if (!th) {
+ if (IS_ERR(th)) {
*pneed |= service;
continue;
}
@@ -399,6 +399,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th =
get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
+
ceph_x_validate_tickets(ac, &need);
dout("build_request want %x have %x need %x\n",
@@ -450,7 +453,6 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
return -ERANGE;
head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
- BUG_ON(!th);
ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
if (ret)
return ret;
@@ -505,7 +507,8 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
case CEPHX_GET_PRINCIPAL_SESSION_KEY:
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
- BUG_ON(!th);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
buf + sizeof(*head), end);
break;
@@ -563,8 +566,8 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
void *end = p + sizeof(au->reply_buf);
th = get_ticket_handler(ac, au->service);
- if (!th)
- return -EIO; /* hrm! */
+ if (IS_ERR(th))
+ return PTR_ERR(th);
ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
if (ret < 0)
return ret;
@@ -626,7 +629,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
- if (th && !IS_ERR(th))
+ if (!IS_ERR(th))
remove_ticket_handler(ac, th);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7bf182b0397..5e9da996a15 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
used |= CEPH_CAP_PIN;
if (ci->i_rd_ref)
used |= CEPH_CAP_FILE_RD;
- if (ci->i_rdcache_ref || ci->i_rdcache_gen)
+ if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
used |= CEPH_CAP_FILE_CACHE;
if (ci->i_wr_ref)
used |= CEPH_CAP_FILE_WR;
@@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
gid_t gid;
struct ceph_mds_session *session;
u64 xattr_version = 0;
+ struct ceph_buffer *xattr_blob = NULL;
int delayed = 0;
u64 flush_tid = 0;
int i;
@@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
for (i = 0; i < CEPH_CAP_BITS; i++)
if (flushing & (1 << i))
ci->i_cap_flush_tid[i] = flush_tid;
+
+ follows = ci->i_head_snapc->seq;
+ } else {
+ follows = 0;
}
keep = cap->implemented;
@@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
mtime = inode->i_mtime;
atime = inode->i_atime;
time_warp_seq = ci->i_time_warp_seq;
- follows = ci->i_snap_realm->cached_context->seq;
uid = inode->i_uid;
gid = inode->i_gid;
mode = inode->i_mode;
- if (dropping & CEPH_CAP_XATTR_EXCL) {
+ if (flushing & CEPH_CAP_XATTR_EXCL) {
__ceph_build_xattrs_blob(ci);
- xattr_version = ci->i_xattrs.version + 1;
+ xattr_blob = ci->i_xattrs.blob;
+ xattr_version = ci->i_xattrs.version;
}
spin_unlock(&inode->i_lock);
@@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
size, max_size, &mtime, &atime, time_warp_seq,
- uid, gid, mode,
- xattr_version,
- (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL,
+ uid, gid, mode, xattr_version, xattr_blob,
follows);
if (ret < 0) {
dout("error sending cap msg, must requeue %p\n", inode);
@@ -1192,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
* asynchronously back to the MDS once sync writes complete and dirty
* data is written out.
*
+ * Unless @again is true, skip cap_snaps that were already sent to
+ * the MDS (i.e., during this session).
+ *
* Called under i_lock. Takes s_mutex as needed.
*/
void __ceph_flush_snaps(struct ceph_inode_info *ci,
- struct ceph_mds_session **psession)
+ struct ceph_mds_session **psession,
+ int again)
__releases(ci->vfs_inode->i_lock)
__acquires(ci->vfs_inode->i_lock)
{
@@ -1224,7 +1231,7 @@ retry:
* pages to be written out.
*/
if (capsnap->dirty_pages || capsnap->writing)
- continue;
+ break;
/*
* if cap writeback already occurred, we should have dropped
@@ -1237,6 +1244,13 @@ retry:
dout("no auth cap (migrating?), doing nothing\n");
goto out;
}
+
+ /* only flush each capsnap once */
+ if (!again && !list_empty(&capsnap->flushing_item)) {
+ dout("already flushed %p, skipping\n", capsnap);
+ continue;
+ }
+
mds = ci->i_auth_cap->session->s_mds;
mseq = ci->i_auth_cap->mseq;
@@ -1273,8 +1287,8 @@ retry:
&session->s_cap_snaps_flushing);
spin_unlock(&inode->i_lock);
- dout("flush_snaps %p cap_snap %p follows %lld size %llu\n",
- inode, capsnap, next_follows, capsnap->size);
+ dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
+ inode, capsnap, capsnap->follows, capsnap->flush_tid);
send_cap_msg(session, ceph_vino(inode).ino, 0,
CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
@@ -1282,7 +1296,7 @@ retry:
&capsnap->mtime, &capsnap->atime,
capsnap->time_warp_seq,
capsnap->uid, capsnap->gid, capsnap->mode,
- 0, NULL,
+ capsnap->xattr_version, capsnap->xattr_blob,
capsnap->follows);
next_follows = capsnap->follows + 1;
@@ -1311,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
struct inode *inode = &ci->vfs_inode;
spin_lock(&inode->i_lock);
- __ceph_flush_snaps(ci, NULL);
+ __ceph_flush_snaps(ci, NULL, 0);
spin_unlock(&inode->i_lock);
}
@@ -1332,7 +1346,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
ceph_cap_string(was | mask));
ci->i_dirty_caps |= mask;
if (was == 0) {
- dout(" inode %p now dirty\n", &ci->vfs_inode);
+ if (!ci->i_head_snapc)
+ ci->i_head_snapc = ceph_get_snap_context(
+ ci->i_snap_realm->cached_context);
+ dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode,
+ ci->i_head_snapc);
BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock);
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1470,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
/* flush snaps first time around only */
if (!list_empty(&ci->i_cap_snaps))
- __ceph_flush_snaps(ci, &session);
+ __ceph_flush_snaps(ci, &session, 0);
goto retry_locked;
retry:
spin_lock(&inode->i_lock);
@@ -1887,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
if (cap && cap->session == session) {
dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
cap, capsnap);
- __ceph_flush_snaps(ci, &session);
+ __ceph_flush_snaps(ci, &session, 1);
} else {
pr_err("%p auth cap %p not mds%d ???\n", inode,
cap, session->s_mds);
@@ -2190,7 +2208,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
if (ci->i_head_snapc == snapc) {
ci->i_wrbuffer_ref_head -= nr;
- if (!ci->i_wrbuffer_ref_head) {
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
+ BUG_ON(!ci->i_head_snapc);
ceph_put_snap_context(ci->i_head_snapc);
ci->i_head_snapc = NULL;
}
@@ -2263,7 +2283,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
- int seq = le32_to_cpu(grant->seq);
+ unsigned seq = le32_to_cpu(grant->seq);
+ unsigned issue_seq = le32_to_cpu(grant->issue_seq);
int newcaps = le32_to_cpu(grant->caps);
int issued, implemented, used, wanted, dirty;
u64 size = le64_to_cpu(grant->size);
@@ -2275,8 +2296,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
int revoked_rdcache = 0;
int queue_invalidate = 0;
- dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
- inode, cap, mds, seq, ceph_cap_string(newcaps));
+ dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n",
+ inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps));
dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
inode->i_size);
@@ -2372,6 +2393,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
}
cap->seq = seq;
+ cap->issue_seq = issue_seq;
/* file layout may have changed */
ci->i_layout = grant->layout;
@@ -2483,6 +2505,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
dout(" inode %p now clean\n", inode);
BUG_ON(!list_empty(&ci->i_dirty_item));
drop = 1;
+ if (ci->i_wrbuffer_ref_head == 0) {
+ BUG_ON(!ci->i_head_snapc);
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
} else {
BUG_ON(list_empty(&ci->i_dirty_item));
}
@@ -2749,15 +2776,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
if (op == CEPH_CAP_OP_IMPORT)
__queue_cap_release(session, vino.ino, cap_id,
mseq, seq);
-
- /*
- * send any full release message to try to move things
- * along for the mds (who clearly thinks we still have this
- * cap).
- */
- ceph_add_cap_releases(mdsc, session);
- ceph_send_cap_releases(mdsc, session);
- goto done;
+ goto flush_cap_releases;
}
/* these will work even if we don't have a cap yet */
@@ -2785,7 +2804,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
dout(" no cap on %p ino %llx.%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode), mds);
spin_unlock(&inode->i_lock);
- goto done;
+ goto flush_cap_releases;
}
/* note that each of these drops i_lock for us */
@@ -2809,6 +2828,17 @@ void ceph_handle_caps(struct ceph_mds_session *session,
ceph_cap_op_name(op));
}
+ goto done;
+
+flush_cap_releases:
+ /*
+ * send any full release message to try to move things
+ * along for the mds (who clearly thinks we still have this
+ * cap).
+ */
+ ceph_add_cap_releases(mdsc, session);
+ ceph_send_cap_releases(mdsc, session);
+
done:
mutex_unlock(&session->s_mutex);
done_unlocked:
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 360c4f22718..6fd8b20a861 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -171,6 +171,8 @@ static int mdsc_show(struct seq_file *s, void *p)
} else if (req->r_dentry) {
path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
&pathbase, 0);
+ if (IS_ERR(path))
+ path = NULL;
spin_lock(&req->r_dentry->d_lock);
seq_printf(s, " #%llx/%.*s (%s)",
ceph_ino(req->r_dentry->d_parent->d_inode),
@@ -187,6 +189,8 @@ static int mdsc_show(struct seq_file *s, void *p)
if (req->r_old_dentry) {
path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
&pathbase, 0);
+ if (IS_ERR(path))
+ path = NULL;
spin_lock(&req->r_old_dentry->d_lock);
seq_printf(s, " #%llx/%.*s (%s)",
ceph_ino(req->r_old_dentry->d_parent->d_inode),
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 67bbb41d552..a1986eb5204 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -46,7 +46,7 @@ int ceph_init_dentry(struct dentry *dentry)
else
dentry->d_op = &ceph_snap_dentry_ops;
- di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS);
+ di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di)
return -ENOMEM; /* oh well */
@@ -1021,11 +1021,15 @@ out_touch:
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
- struct inode *parent_inode = dentry->d_parent->d_inode;
- u64 snapid = ceph_snap(parent_inode);
+ struct inode *parent_inode = NULL;
+ u64 snapid = CEPH_NOSNAP;
+ if (!IS_ROOT(dentry)) {
+ parent_inode = dentry->d_parent->d_inode;
+ if (parent_inode)
+ snapid = ceph_snap(parent_inode);
+ }
dout("dentry_release %p parent %p\n", dentry, parent_inode);
-
if (parent_inode && snapid != CEPH_SNAPDIR) {
struct ceph_inode_info *ci = ceph_inode(parent_inode);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 4480cb1c63e..e38423e82f2 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -42,32 +42,37 @@ struct ceph_nfs_confh {
static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
int connectable)
{
+ int type;
struct ceph_nfs_fh *fh = (void *)rawfh;
struct ceph_nfs_confh *cfh = (void *)rawfh;
struct dentry *parent = dentry->d_parent;
struct inode *inode = dentry->d_inode;
- int type;
+ int connected_handle_length = sizeof(*cfh)/4;
+ int handle_length = sizeof(*fh)/4;
/* don't re-export snaps */
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EINVAL;
- if (*max_len >= sizeof(*cfh)) {
+ if (*max_len >= connected_handle_length) {
dout("encode_fh %p connectable\n", dentry);
cfh->ino = ceph_ino(dentry->d_inode);
cfh->parent_ino = ceph_ino(parent->d_inode);
cfh->parent_name_hash = parent->d_name.hash;
- *max_len = sizeof(*cfh);
+ *max_len = connected_handle_length;
type = 2;
- } else if (*max_len > sizeof(*fh)) {
- if (connectable)
- return -ENOSPC;
+ } else if (*max_len >= handle_length) {
+ if (connectable) {
+ *max_len = connected_handle_length;
+ return 255;
+ }
dout("encode_fh %p\n", dentry);
fh->ino = ceph_ino(dentry->d_inode);
- *max_len = sizeof(*fh);
+ *max_len = handle_length;
type = 1;
} else {
- return -ENOSPC;
+ *max_len = handle_length;
+ return 255;
}
return type;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8c044a4f045..66e4da6dba2 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -697,7 +697,7 @@ more:
* start_request so that a tid has been assigned.
*/
spin_lock(&ci->i_unsafe_lock);
- list_add(&ci->i_unsafe_writes, &req->r_unsafe_item);
+ list_add(&req->r_unsafe_item, &ci->i_unsafe_writes);
spin_unlock(&ci->i_unsafe_lock);
ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5d893d31e39..62377ec37ed 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -677,6 +677,7 @@ static int fill_inode(struct inode *inode,
if (ci->i_files == 0 && ci->i_subdirs == 0 &&
ceph_snap(inode) == CEPH_NOSNAP &&
(le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+ (issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
ci->i_ceph_flags |= CEPH_I_COMPLETE;
@@ -844,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
* the caller) if we fail.
*/
static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
- bool *prehash)
+ bool *prehash, bool set_offset)
{
struct dentry *realdn;
@@ -876,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
}
if ((!prehash || *prehash) && d_unhashed(dn))
d_rehash(dn);
- ceph_set_dentry_offset(dn);
+ if (set_offset)
+ ceph_set_dentry_offset(dn);
out:
return dn;
}
@@ -1061,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
d_delete(dn);
goto done;
}
- dn = splice_dentry(dn, in, &have_lease);
+ dn = splice_dentry(dn, in, &have_lease, true);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
@@ -1104,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
goto done;
}
dout(" linking snapped dir %p to dn %p\n", in, dn);
- dn = splice_dentry(dn, in, NULL);
+ dn = splice_dentry(dn, in, NULL, true);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
@@ -1229,14 +1231,14 @@ retry_lookup:
in = dn->d_inode;
} else {
in = ceph_get_inode(parent->d_sb, vino);
- if (in == NULL) {
+ if (IS_ERR(in)) {
dout("new_inode badness\n");
d_delete(dn);
dput(dn);
- err = -ENOMEM;
+ err = PTR_ERR(in);
goto out;
}
- dn = splice_dentry(dn, in, NULL);
+ dn = splice_dentry(dn, in, NULL, false);
if (IS_ERR(dn))
dn = NULL;
}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ae85af06454..ff4e753aae9 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -82,7 +82,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
length = fl->fl_end - fl->fl_start + 1;
err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ (u64)fl->fl_pid,
+ (u64)(unsigned long)fl->fl_nspid,
lock_cmd, fl->fl_start,
length, wait);
if (!err) {
@@ -92,7 +93,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
/* undo! This should only happen if the kernel detects
* local deadlock. */
ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ (u64)fl->fl_pid,
+ (u64)(unsigned long)fl->fl_nspid,
CEPH_LOCK_UNLOCK, fl->fl_start,
length, 0);
dout("got %d on posix_lock_file, undid lock", err);
@@ -132,7 +134,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
length = fl->fl_end - fl->fl_start + 1;
err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
- file, (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ file, (u64)fl->fl_pid,
+ (u64)(unsigned long)fl->fl_nspid,
lock_cmd, fl->fl_start,
length, wait);
if (!err) {
@@ -141,7 +144,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
ceph_lock_message(CEPH_LOCK_FLOCK,
CEPH_MDS_OP_SETFILELOCK,
file, (u64)fl->fl_pid,
- (u64)fl->fl_nspid,
+ (u64)(unsigned long)fl->fl_nspid,
CEPH_LOCK_UNLOCK, fl->fl_start,
length, 0);
dout("got %d on flock_lock_file_wait, undid lock", err);
@@ -235,7 +238,8 @@ int lock_to_ceph_filelock(struct file_lock *lock,
cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
cephlock->client = cpu_to_le64(0);
cephlock->pid = cpu_to_le64(lock->fl_pid);
- cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid);
+ cephlock->pid_namespace =
+ cpu_to_le64((u64)(unsigned long)lock->fl_nspid);
switch (lock->fl_type) {
case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a75ddbf9fe3..fad95f8f260 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -560,6 +560,13 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*
* Called under mdsc->mutex.
*/
+struct dentry *get_nonsnap_parent(struct dentry *dentry)
+{
+ while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+ dentry = dentry->d_parent;
+ return dentry;
+}
+
static int __choose_mds(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
@@ -590,14 +597,29 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
if (req->r_inode) {
inode = req->r_inode;
} else if (req->r_dentry) {
- if (req->r_dentry->d_inode) {
+ struct inode *dir = req->r_dentry->d_parent->d_inode;
+
+ if (dir->i_sb != mdsc->client->sb) {
+ /* not this fs! */
+ inode = req->r_dentry->d_inode;
+ } else if (ceph_snap(dir) != CEPH_NOSNAP) {
+ /* direct snapped/virtual snapdir requests
+ * based on parent dir inode */
+ struct dentry *dn =
+ get_nonsnap_parent(req->r_dentry->d_parent);
+ inode = dn->d_inode;
+ dout("__choose_mds using nonsnap parent %p\n", inode);
+ } else if (req->r_dentry->d_inode) {
+ /* dentry target */
inode = req->r_dentry->d_inode;
} else {
- inode = req->r_dentry->d_parent->d_inode;
+ /* dir + name */
+ inode = dir;
hash = req->r_dentry->d_name.hash;
is_hash = true;
}
}
+
dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
(int)hash, mode);
if (!inode)
@@ -2208,7 +2230,7 @@ static void handle_session(struct ceph_mds_session *session,
pr_info("mds%d reconnect denied\n", session->s_mds);
remove_session_caps(session);
wake = 1; /* for good measure */
- complete_all(&mdsc->session_close_waiters);
+ wake_up_all(&mdsc->session_close_wq);
kick_requests(mdsc, mds);
break;
@@ -2302,7 +2324,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
err = PTR_ERR(path);
- BUG_ON(err);
+ goto out_dput;
}
} else {
path = NULL;
@@ -2310,7 +2332,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
err = ceph_pagelist_encode_string(pagelist, path, pathlen);
if (err)
- goto out;
+ goto out_free;
spin_lock(&inode->i_lock);
cap->seq = 0; /* reset cap seq */
@@ -2352,10 +2374,13 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
num_fcntl_locks,
num_flock_locks);
unlock_kernel();
+ } else {
+ err = ceph_pagelist_append(pagelist, &rec, reclen);
}
-out:
+out_free:
kfree(path);
+out_dput:
dput(dentry);
return err;
}
@@ -2876,7 +2901,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
return -ENOMEM;
init_completion(&mdsc->safe_umount_waiters);
- init_completion(&mdsc->session_close_waiters);
+ init_waitqueue_head(&mdsc->session_close_wq);
INIT_LIST_HEAD(&mdsc->waiting_for_map);
mdsc->sessions = NULL;
mdsc->max_sessions = 0;
@@ -3021,6 +3046,23 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
}
+/*
+ * true if all sessions are closed, or we force unmount
+ */
+bool done_closing_sessions(struct ceph_mds_client *mdsc)
+{
+ int i, n = 0;
+
+ if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
+ return true;
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++)
+ if (mdsc->sessions[i])
+ n++;
+ mutex_unlock(&mdsc->mutex);
+ return n == 0;
+}
/*
* called after sb is ro.
@@ -3029,45 +3071,32 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{
struct ceph_mds_session *session;
int i;
- int n;
struct ceph_client *client = mdsc->client;
- unsigned long started, timeout = client->mount_args->mount_timeout * HZ;
+ unsigned long timeout = client->mount_args->mount_timeout * HZ;
dout("close_sessions\n");
- mutex_lock(&mdsc->mutex);
-
/* close sessions */
- started = jiffies;
- while (time_before(jiffies, started + timeout)) {
- dout("closing sessions\n");
- n = 0;
- for (i = 0; i < mdsc->max_sessions; i++) {
- session = __ceph_lookup_mds_session(mdsc, i);
- if (!session)
- continue;
- mutex_unlock(&mdsc->mutex);
- mutex_lock(&session->s_mutex);
- __close_session(mdsc, session);
- mutex_unlock(&session->s_mutex);
- ceph_put_mds_session(session);
- mutex_lock(&mdsc->mutex);
- n++;
- }
- if (n == 0)
- break;
-
- if (client->mount_state == CEPH_MOUNT_SHUTDOWN)
- break;
-
- dout("waiting for sessions to close\n");
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
mutex_unlock(&mdsc->mutex);
- wait_for_completion_timeout(&mdsc->session_close_waiters,
- timeout);
+ mutex_lock(&session->s_mutex);
+ __close_session(mdsc, session);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
mutex_lock(&mdsc->mutex);
}
+ mutex_unlock(&mdsc->mutex);
+
+ dout("waiting for sessions to close\n");
+ wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
+ timeout);
/* tear down remaining sessions */
+ mutex_lock(&mdsc->mutex);
for (i = 0; i < mdsc->max_sessions; i++) {
if (mdsc->sessions[i]) {
session = get_session(mdsc->sessions[i]);
@@ -3080,9 +3109,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
mutex_lock(&mdsc->mutex);
}
}
-
WARN_ON(!list_empty(&mdsc->cap_delay_list));
-
mutex_unlock(&mdsc->mutex);
ceph_cleanup_empty_realms(mdsc);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ab7e89f5e34..c98267ce6d2 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -234,7 +234,8 @@ struct ceph_mds_client {
struct mutex mutex; /* all nested structures */
struct ceph_mdsmap *mdsmap;
- struct completion safe_umount_waiters, session_close_waiters;
+ struct completion safe_umount_waiters;
+ wait_queue_head_t session_close_wq;
struct list_head waiting_for_map;
struct ceph_mds_session **sessions; /* NULL for mds if no session */
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index bed6391e52c..3b5571b8ce2 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -549,7 +549,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
*/
static void __cancel_request(struct ceph_osd_request *req)
{
- if (req->r_sent) {
+ if (req->r_sent && req->r_osd) {
ceph_con_revoke(&req->r_osd->o_con, req->r_request);
req->r_sent = 0;
}
@@ -661,7 +661,7 @@ static int __send_request(struct ceph_osd_client *osdc,
reqhead->reassert_version = req->r_reassert_version;
req->r_stamp = jiffies;
- list_move_tail(&osdc->req_lru, &req->r_req_lru_item);
+ list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
ceph_msg_get(req->r_request); /* send consumes a ref */
ceph_con_send(&req->r_osd->o_con, req->r_request);
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index b6859f47d36..46a368b6dce 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -5,10 +5,18 @@
#include "pagelist.h"
+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
+{
+ struct page *page = list_entry(pl->head.prev, struct page,
+ lru);
+ kunmap(page);
+}
+
int ceph_pagelist_release(struct ceph_pagelist *pl)
{
if (pl->mapped_tail)
- kunmap(pl->mapped_tail);
+ ceph_pagelist_unmap_tail(pl);
+
while (!list_empty(&pl->head)) {
struct page *page = list_first_entry(&pl->head, struct page,
lru);
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
pl->room += PAGE_SIZE;
list_add_tail(&page->lru, &pl->head);
if (pl->mapped_tail)
- kunmap(pl->mapped_tail);
+ ceph_pagelist_unmap_tail(pl);
pl->mapped_tail = kmap(page);
return 0;
}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index c0b26b6badb..190b6c4a6f2 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
INIT_LIST_HEAD(&realm->children);
INIT_LIST_HEAD(&realm->child_item);
INIT_LIST_HEAD(&realm->empty_item);
+ INIT_LIST_HEAD(&realm->dirty_item);
INIT_LIST_HEAD(&realm->inodes_with_caps);
spin_lock_init(&realm->inodes_with_caps_lock);
__insert_snap_realm(&mdsc->snap_realms, realm);
@@ -435,7 +436,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_cap_snap *capsnap;
- int used;
+ int used, dirty;
capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
if (!capsnap) {
@@ -445,6 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
if (__ceph_have_pending_cap_snap(ci)) {
/* there is no point in queuing multiple "pending" cap_snaps,
as no new writes are allowed to start when pending, so any
@@ -452,27 +454,37 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap);
- } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) {
+ } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) ||
+ (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
+ CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) {
struct ceph_snap_context *snapc = ci->i_head_snapc;
+ dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
+ capsnap, snapc);
igrab(inode);
-
+
atomic_set(&capsnap->nref, 1);
capsnap->ci = ci;
INIT_LIST_HEAD(&capsnap->ci_item);
INIT_LIST_HEAD(&capsnap->flushing_item);
- capsnap->follows = snapc->seq - 1;
+ capsnap->follows = snapc->seq;
capsnap->issued = __ceph_caps_issued(ci, NULL);
- capsnap->dirty = __ceph_caps_dirty(ci);
+ capsnap->dirty = dirty;
capsnap->mode = inode->i_mode;
capsnap->uid = inode->i_uid;
capsnap->gid = inode->i_gid;
- /* fixme? */
- capsnap->xattr_blob = NULL;
- capsnap->xattr_len = 0;
+ if (dirty & CEPH_CAP_XATTR_EXCL) {
+ __ceph_build_xattrs_blob(ci);
+ capsnap->xattr_blob =
+ ceph_buffer_get(ci->i_xattrs.blob);
+ capsnap->xattr_version = ci->i_xattrs.version;
+ } else {
+ capsnap->xattr_blob = NULL;
+ capsnap->xattr_version = 0;
+ }
/* dirty page count moved from _head to this cap_snap;
all subsequent writes page dirties occur _after_ this
@@ -480,7 +492,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
ci->i_wrbuffer_ref_head = 0;
capsnap->context = snapc;
- ci->i_head_snapc = NULL;
+ ci->i_head_snapc =
+ ceph_get_snap_context(ci->i_snap_realm->cached_context);
+ dout(" new snapc is %p\n", ci->i_head_snapc);
list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
if (used & CEPH_CAP_FILE_WR) {
@@ -539,6 +553,41 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
return 1; /* caller may want to ceph_flush_snaps */
}
+/*
+ * Queue cap_snaps for snap writeback for this realm and its children.
+ * Called under snap_rwsem, so realm topology won't change.
+ */
+static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
+{
+ struct ceph_inode_info *ci;
+ struct inode *lastinode = NULL;
+ struct ceph_snap_realm *child;
+
+ dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino);
+
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_for_each_entry(ci, &realm->inodes_with_caps,
+ i_snap_realm_item) {
+ struct inode *inode = igrab(&ci->vfs_inode);
+ if (!inode)
+ continue;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ if (lastinode)
+ iput(lastinode);
+ lastinode = inode;
+ ceph_queue_cap_snap(ci);
+ spin_lock(&realm->inodes_with_caps_lock);
+ }
+ spin_unlock(&realm->inodes_with_caps_lock);
+ if (lastinode)
+ iput(lastinode);
+
+ dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino);
+ list_for_each_entry(child, &realm->children, child_item)
+ queue_realm_cap_snaps(child);
+
+ dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
+}
/*
* Parse and apply a snapblob "snap trace" from the MDS. This specifies
@@ -556,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
struct ceph_snap_realm *realm;
int invalidate = 0;
int err = -ENOMEM;
+ LIST_HEAD(dirty_realms);
dout("update_snap_trace deletion=%d\n", deletion);
more:
@@ -578,45 +628,6 @@ more:
}
}
- if (le64_to_cpu(ri->seq) > realm->seq) {
- dout("update_snap_trace updating %llx %p %lld -> %lld\n",
- realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
- /*
- * if the realm seq has changed, queue a cap_snap for every
- * inode with open caps. we do this _before_ we update
- * the realm info so that we prepare for writeback under the
- * _previous_ snap context.
- *
- * ...unless it's a snap deletion!
- */
- if (!deletion) {
- struct ceph_inode_info *ci;
- struct inode *lastinode = NULL;
-
- spin_lock(&realm->inodes_with_caps_lock);
- list_for_each_entry(ci, &realm->inodes_with_caps,
- i_snap_realm_item) {
- struct inode *inode = igrab(&ci->vfs_inode);
- if (!inode)
- continue;
- spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
- lastinode = inode;
- ceph_queue_cap_snap(ci);
- spin_lock(&realm->inodes_with_caps_lock);
- }
- spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
- dout("update_snap_trace cap_snaps queued\n");
- }
-
- } else {
- dout("update_snap_trace %llx %p seq %lld unchanged\n",
- realm->ino, realm, realm->seq);
- }
-
/* ensure the parent is correct */
err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
if (err < 0)
@@ -624,6 +635,8 @@ more:
invalidate += err;
if (le64_to_cpu(ri->seq) > realm->seq) {
+ dout("update_snap_trace updating %llx %p %lld -> %lld\n",
+ realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
/* update realm parameters, snap lists */
realm->seq = le64_to_cpu(ri->seq);
realm->created = le64_to_cpu(ri->created);
@@ -641,9 +654,17 @@ more:
if (err < 0)
goto fail;
+ /* queue realm for cap_snap creation */
+ list_add(&realm->dirty_item, &dirty_realms);
+
invalidate = 1;
} else if (!realm->cached_context) {
+ dout("update_snap_trace %llx %p seq %lld new\n",
+ realm->ino, realm, realm->seq);
invalidate = 1;
+ } else {
+ dout("update_snap_trace %llx %p seq %lld unchanged\n",
+ realm->ino, realm, realm->seq);
}
dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -656,6 +677,14 @@ more:
if (invalidate)
rebuild_snap_realms(realm);
+ /*
+ * queue cap snaps _after_ we've built the new snap contexts,
+ * so that i_head_snapc can be set appropriately.
+ */
+ list_for_each_entry(realm, &dirty_realms, dirty_item) {
+ queue_realm_cap_snaps(realm);
+ }
+
__cleanup_empty_realms(mdsc);
return 0;
@@ -688,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
igrab(inode);
spin_unlock(&mdsc->snap_flush_lock);
spin_lock(&inode->i_lock);
- __ceph_flush_snaps(ci, &session);
+ __ceph_flush_snaps(ci, &session, 0);
spin_unlock(&inode->i_lock);
iput(inode);
spin_lock(&mdsc->snap_flush_lock);
@@ -789,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
};
struct inode *inode = ceph_find_inode(sb, vino);
struct ceph_inode_info *ci;
+ struct ceph_snap_realm *oldrealm;
if (!inode)
continue;
@@ -814,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
dout(" will move %p to split realm %llx %p\n",
inode, realm->ino, realm);
/*
- * Remove the inode from the realm's inode
- * list, but don't add it to the new realm
- * yet. We don't want the cap_snap to be
- * queued (again) by ceph_update_snap_trace()
- * below. Queue it _now_, under the old context.
+ * Move the inode to the new realm
*/
spin_lock(&realm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
+ list_add(&ci->i_snap_realm_item,
+ &realm->inodes_with_caps);
+ oldrealm = ci->i_snap_realm;
+ ci->i_snap_realm = realm;
spin_unlock(&realm->inodes_with_caps_lock);
spin_unlock(&inode->i_lock);
- ceph_queue_cap_snap(ci);
+ ceph_get_snap_realm(mdsc, realm);
+ ceph_put_snap_realm(mdsc, oldrealm);
iput(inode);
continue;
@@ -853,43 +884,9 @@ skip_inode:
ceph_update_snap_trace(mdsc, p, e,
op == CEPH_SNAP_OP_DESTROY);
- if (op == CEPH_SNAP_OP_SPLIT) {
- /*
- * ok, _now_ add the inodes into the new realm.
- */
- for (i = 0; i < num_split_inos; i++) {
- struct ceph_vino vino = {
- .ino = le64_to_cpu(split_inos[i]),
- .snap = CEPH_NOSNAP,
- };
- struct inode *inode = ceph_find_inode(sb, vino);
- struct ceph_inode_info *ci;
-
- if (!inode)
- continue;
- ci = ceph_inode(inode);
- spin_lock(&inode->i_lock);
- if (list_empty(&ci->i_snap_realm_item)) {
- struct ceph_snap_realm *oldrealm =
- ci->i_snap_realm;
-
- dout(" moving %p to split realm %llx %p\n",
- inode, realm->ino, realm);
- spin_lock(&realm->inodes_with_caps_lock);
- list_add(&ci->i_snap_realm_item,
- &realm->inodes_with_caps);
- ci->i_snap_realm = realm;
- spin_unlock(&realm->inodes_with_caps_lock);
- ceph_get_snap_realm(mdsc, realm);
- ceph_put_snap_realm(mdsc, oldrealm);
- }
- spin_unlock(&inode->i_lock);
- iput(inode);
- }
-
+ if (op == CEPH_SNAP_OP_SPLIT)
/* we took a reference when we created the realm, above */
ceph_put_snap_realm(mdsc, realm);
- }
__cleanup_empty_realms(mdsc);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 2482d696f0d..b87638e84c4 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -216,8 +216,7 @@ struct ceph_cap_snap {
uid_t uid;
gid_t gid;
- void *xattr_blob;
- int xattr_len;
+ struct ceph_buffer *xattr_blob;
u64 xattr_version;
u64 size;
@@ -229,8 +228,11 @@ struct ceph_cap_snap {
static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
{
- if (atomic_dec_and_test(&capsnap->nref))
+ if (atomic_dec_and_test(&capsnap->nref)) {
+ if (capsnap->xattr_blob)
+ ceph_buffer_put(capsnap->xattr_blob);
kfree(capsnap);
+ }
}
/*
@@ -342,7 +344,8 @@ struct ceph_inode_info {
unsigned i_cap_exporting_issued;
struct ceph_cap_reservation i_cap_migration_resv;
struct list_head i_cap_snaps; /* snapped state pending flush to mds */
- struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 */
+ struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
+ dirty|flushing caps */
unsigned i_snap_caps; /* cap bits for snapped files */
int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
@@ -687,6 +690,8 @@ struct ceph_snap_realm {
struct list_head empty_item; /* if i have ref==0 */
+ struct list_head dirty_item; /* if realm needs new context */
+
/* the current set of snaps for this realm */
struct ceph_snap_context *cached_context;
@@ -823,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
- struct ceph_mds_session **psession);
+ struct ceph_mds_session **psession,
+ int again);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session);
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 097a2654c00..9578af610b7 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -485,6 +485,7 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
ci->i_xattrs.prealloc_blob = NULL;
ci->i_xattrs.dirty = false;
+ ci->i_xattrs.version++;
}
}