aboutsummaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-24 07:37:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-24 07:37:52 -0700
commit6e188240ebc2a132d70924942d7c8b9acb46e11a (patch)
tree7628df39f9c1d60a639504faaf6b5941b2c4b4ae /fs/ceph
parent62a11ae3405b6da2535d28e5facc2de5af4a7e62 (diff)
parent240ed68eb567d80dd6bab739341999a5ab0ad55d (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (59 commits) ceph: reuse mon subscribe message instead of allocated anew ceph: avoid resending queued message to monitor ceph: Storage class should be before const qualifier ceph: all allocation functions should get gfp_mask ceph: specify max_bytes on readdir replies ceph: cleanup pool op strings ceph: Use kzalloc ceph: use common helper for aborted dir request invalidation ceph: cope with out of order (unsafe after safe) mds reply ceph: save peer feature bits in connection structure ceph: resync headers with userland ceph: use ceph. prefix for virtual xattrs ceph: throw out dirty caps metadata, data on session teardown ceph: attempt mds reconnect if mds closes our session ceph: clean up send_mds_reconnect interface ceph: wait for mds OPEN reply to indicate reconnect success ceph: only send cap releases when mds is OPEN|HUNG ceph: dicard cap releases on mds restart ceph: make mon client statfs handling more generic ceph: drop src address(es) from message header [new protocol feature] ...
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c11
-rw-r--r--fs/ceph/auth.c9
-rw-r--r--fs/ceph/auth.h2
-rw-r--r--fs/ceph/auth_none.c1
-rw-r--r--fs/ceph/auth_x.c19
-rw-r--r--fs/ceph/caps.c24
-rw-r--r--fs/ceph/ceph_fs.h62
-rw-r--r--fs/ceph/ceph_strings.c16
-rw-r--r--fs/ceph/debugfs.c13
-rw-r--r--fs/ceph/dir.c45
-rw-r--r--fs/ceph/export.c14
-rw-r--r--fs/ceph/file.c16
-rw-r--r--fs/ceph/inode.c97
-rw-r--r--fs/ceph/ioctl.c2
-rw-r--r--fs/ceph/mds_client.c385
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/messenger.c91
-rw-r--r--fs/ceph/messenger.h10
-rw-r--r--fs/ceph/mon_client.c257
-rw-r--r--fs/ceph/mon_client.h27
-rw-r--r--fs/ceph/msgpool.c180
-rw-r--r--fs/ceph/msgpool.h12
-rw-r--r--fs/ceph/msgr.h21
-rw-r--r--fs/ceph/osd_client.c98
-rw-r--r--fs/ceph/pagelist.c2
-rw-r--r--fs/ceph/rados.h23
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c125
-rw-r--r--fs/ceph/super.h30
-rw-r--r--fs/ceph/xattr.c35
30 files changed, 876 insertions, 759 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a9005d862ed..d9c60b84949 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
int rc = 0;
struct page **pages;
- struct pagevec pvec;
loff_t offset;
u64 len;
@@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
if (rc < 0)
goto out;
- /* set uptodate and add to lru in pagevec-sized chunks */
- pagevec_init(&pvec, 0);
for (; !list_empty(page_list) && len > 0;
rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) {
struct page *page =
@@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
zero_user_segment(page, s, PAGE_CACHE_SIZE);
}
- if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) {
+ if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) {
page_cache_release(page);
dout("readpages %p add_to_page_cache failed %p\n",
inode, page);
@@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
flush_dcache_page(page);
SetPageUptodate(page);
unlock_page(page);
- if (pagevec_add(&pvec, page) == 0)
- pagevec_lru_add_file(&pvec); /* add to lru */
+ page_cache_release(page);
}
- pagevec_lru_add_file(&pvec);
rc = 0;
out:
@@ -568,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req,
ceph_release_pages(req->r_pages, req->r_num_pages);
if (req->r_pages_from_pool)
mempool_free(req->r_pages,
- ceph_client(inode->i_sb)->wb_pagevec_pool);
+ ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
else
kfree(req->r_pages);
ceph_osdc_put_request(req);
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index 818afe72e6c..9f46de2ba7a 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -150,7 +150,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac,
ret = ac->ops->build_request(ac, p + sizeof(u32), end);
if (ret < 0) {
- pr_err("error %d building request\n", ret);
+ pr_err("error %d building auth method %s request\n", ret,
+ ac->ops->name);
return ret;
}
dout(" built request %d bytes\n", ret);
@@ -216,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
if (ac->protocol != protocol) {
ret = ceph_auth_init_protocol(ac, protocol);
if (ret) {
- pr_err("error %d on auth protocol %d init\n",
- ret, protocol);
+ pr_err("error %d on auth method %s init\n",
+ ret, ac->ops->name);
goto out;
}
}
@@ -229,7 +230,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
if (ret == -EAGAIN) {
return ceph_build_auth_request(ac, reply_buf, reply_len);
} else if (ret) {
- pr_err("authentication error %d\n", ret);
+ pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
return ret;
}
return 0;
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h
index ca4f57cfb26..4429a707c02 100644
--- a/fs/ceph/auth.h
+++ b/fs/ceph/auth.h
@@ -15,6 +15,8 @@ struct ceph_auth_client;
struct ceph_authorizer;
struct ceph_auth_client_ops {
+ const char *name;
+
/*
* true if we are authenticated and can connect to
* services.
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
index 8cd9e3af07f..24407c11929 100644
--- a/fs/ceph/auth_none.c
+++ b/fs/ceph/auth_none.c
@@ -94,6 +94,7 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
}
static const struct ceph_auth_client_ops ceph_auth_none_ops = {
+ .name = "none",
.reset = reset,
.destroy = destroy,
.is_authenticated = is_authenticated,
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index fee5a08da88..7b206231566 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -127,7 +127,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
int ret;
char *dbuf;
char *ticket_buf;
- u8 struct_v;
+ u8 reply_struct_v;
dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
if (!dbuf)
@@ -139,14 +139,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
goto out_dbuf;
ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
- struct_v = ceph_decode_8(&p);
- if (struct_v != 1)
+ reply_struct_v = ceph_decode_8(&p);
+ if (reply_struct_v != 1)
goto bad;
num = ceph_decode_32(&p);
dout("%d tickets\n", num);
while (num--) {
int type;
- u8 struct_v;
+ u8 tkt_struct_v, blob_struct_v;
struct ceph_x_ticket_handler *th;
void *dp, *dend;
int dlen;
@@ -165,8 +165,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
type = ceph_decode_32(&p);
dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
- struct_v = ceph_decode_8(&p);
- if (struct_v != 1)
+ tkt_struct_v = ceph_decode_8(&p);
+ if (tkt_struct_v != 1)
goto bad;
th = get_ticket_handler(ac, type);
@@ -186,8 +186,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
dend = dbuf + dlen;
dp = dbuf;
- struct_v = ceph_decode_8(&dp);
- if (struct_v != 1)
+ tkt_struct_v = ceph_decode_8(&dp);
+ if (tkt_struct_v != 1)
goto bad;
memcpy(&old_key, &th->session_key, sizeof(old_key));
@@ -224,7 +224,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
tpend = tp + dlen;
dout(" ticket blob is %d bytes\n", dlen);
ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
- struct_v = ceph_decode_8(&tp);
+ blob_struct_v = ceph_decode_8(&tp);
new_secret_id = ceph_decode_64(&tp);
ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
if (ret)
@@ -618,6 +618,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
static const struct ceph_auth_client_ops ceph_x_ops = {
+ .name = "x",
.is_authenticated = ceph_x_is_authenticated,
.build_request = ceph_x_build_request,
.handle_reply = ceph_x_handle_reply,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d9400534b27..0dd0b81e64f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
- struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
int removed = 0;
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
@@ -937,9 +938,9 @@ static int send_cap_msg(struct ceph_mds_session *session,
seq, issue_seq, mseq, follows, size, max_size,
xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL);
- if (IS_ERR(msg))
- return PTR_ERR(msg);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS);
+ if (!msg)
+ return -ENOMEM;
msg->hdr.tid = cpu_to_le64(flush_tid);
@@ -1298,7 +1299,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
*/
void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
{
- struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct inode *inode = &ci->vfs_inode;
int was = ci->i_dirty_caps;
int dirty = 0;
@@ -1336,7 +1338,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
static int __mark_caps_flushing(struct inode *inode,
struct ceph_mds_session *session)
{
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int flushing;
@@ -1663,7 +1665,7 @@ ack:
static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
unsigned *flush_tid)
{
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int unlock_session = session ? 0 : 1;
int flushing = 0;
@@ -1716,10 +1718,9 @@ out_unlocked:
static int caps_are_flushed(struct inode *inode, unsigned tid)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- int dirty, i, ret = 1;
+ int i, ret = 1;
spin_lock(&inode->i_lock);
- dirty = __ceph_caps_dirty(ci);
for (i = 0; i < CEPH_CAP_BITS; i++)
if ((ci->i_flushing_caps & (1 << i)) &&
ci->i_cap_flush_tid[i] <= tid) {
@@ -1829,7 +1830,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
err = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
} else {
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(inode->i_sb)->mdsc;
spin_lock(&inode->i_lock);
if (__ceph_caps_dirty(ci))
@@ -2411,7 +2413,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
__releases(inode->i_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
unsigned seq = le32_to_cpu(m->seq);
int dirty = le32_to_cpu(m->dirty);
int cleaned = 0;
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h
index 0c2241ef365..3b9eeed097b 100644
--- a/fs/ceph/ceph_fs.h
+++ b/fs/ceph/ceph_fs.h
@@ -19,7 +19,7 @@
* Ceph release version
*/
#define CEPH_VERSION_MAJOR 0
-#define CEPH_VERSION_MINOR 19
+#define CEPH_VERSION_MINOR 20
#define CEPH_VERSION_PATCH 0
#define _CEPH_STRINGIFY(x) #x
@@ -36,7 +36,7 @@
* client-facing protocol.
*/
#define CEPH_OSD_PROTOCOL 8 /* cluster internal */
-#define CEPH_MDS_PROTOCOL 9 /* cluster internal */
+#define CEPH_MDS_PROTOCOL 12 /* cluster internal */
#define CEPH_MON_PROTOCOL 5 /* cluster internal */
#define CEPH_OSDC_PROTOCOL 24 /* server/client */
#define CEPH_MDSC_PROTOCOL 32 /* server/client */
@@ -53,8 +53,18 @@
/*
* feature bits
*/
-#define CEPH_FEATURE_SUPPORTED 0
-#define CEPH_FEATURE_REQUIRED 0
+#define CEPH_FEATURE_UID 1
+#define CEPH_FEATURE_NOSRCADDR 2
+#define CEPH_FEATURE_FLOCK 4
+
+#define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID
+#define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK
+#define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID
+#define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID
+#define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR
/*
@@ -91,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
#define CEPH_AUTH_NONE 0x1
#define CEPH_AUTH_CEPHX 0x2
+#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
+
/*********************************************
* message layer
@@ -128,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
#define CEPH_MSG_CLIENT_SNAP 0x312
#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
+/* pool ops */
+#define CEPH_MSG_POOLOP_REPLY 48
+#define CEPH_MSG_POOLOP 49
+
+
/* osd */
#define CEPH_MSG_OSD_MAP 41
#define CEPH_MSG_OSD_OP 42
#define CEPH_MSG_OSD_OPREPLY 43
+/* pool operations */
+enum {
+ POOL_OP_CREATE = 0x01,
+ POOL_OP_DELETE = 0x02,
+ POOL_OP_AUID_CHANGE = 0x03,
+ POOL_OP_CREATE_SNAP = 0x11,
+ POOL_OP_DELETE_SNAP = 0x12,
+ POOL_OP_CREATE_UNMANAGED_SNAP = 0x21,
+ POOL_OP_DELETE_UNMANAGED_SNAP = 0x22,
+};
+
struct ceph_mon_request_header {
__le64 have_version;
__le16 session_mon;
@@ -155,6 +183,31 @@ struct ceph_mon_statfs_reply {
struct ceph_statfs st;
} __attribute__ ((packed));
+const char *ceph_pool_op_name(int op);
+
+struct ceph_mon_poolop {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+ __le32 pool;
+ __le32 op;
+ __le64 auid;
+ __le64 snapid;
+ __le32 name_len;
+} __attribute__ ((packed));
+
+struct ceph_mon_poolop_reply {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+ __le32 reply_code;
+ __le32 epoch;
+ char has_data;
+ char data[0];
+} __attribute__ ((packed));
+
+struct ceph_mon_unmanaged_snap {
+ __le64 snapid;
+} __attribute__ ((packed));
+
struct ceph_osd_getmap {
struct ceph_mon_request_header monhdr;
struct ceph_fsid fsid;
@@ -308,6 +361,7 @@ union ceph_mds_request_args {
struct {
__le32 frag; /* which dir fragment */
__le32 max_entries; /* how many dentries to grab */
+ __le32 max_bytes;
} __attribute__ ((packed)) readdir;
struct {
__le32 mode;
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c
index 8e4be6a80c6..7503aee828c 100644
--- a/fs/ceph/ceph_strings.c
+++ b/fs/ceph/ceph_strings.c
@@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type)
case CEPH_ENTITY_TYPE_OSD: return "osd";
case CEPH_ENTITY_TYPE_MON: return "mon";
case CEPH_ENTITY_TYPE_CLIENT: return "client";
- case CEPH_ENTITY_TYPE_ADMIN: return "admin";
case CEPH_ENTITY_TYPE_AUTH: return "auth";
default: return "unknown";
}
@@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op)
case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
case CEPH_OSD_OP_RMXATTR: return "rmxattr";
+ case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
case CEPH_OSD_OP_PULL: return "pull";
case CEPH_OSD_OP_PUSH: return "push";
@@ -174,3 +174,17 @@ const char *ceph_snap_op_name(int o)
}
return "???";
}
+
+const char *ceph_pool_op_name(int op)
+{
+ switch (op) {
+ case POOL_OP_CREATE: return "create";
+ case POOL_OP_DELETE: return "delete";
+ case POOL_OP_AUID_CHANGE: return "auid change";
+ case POOL_OP_CREATE_SNAP: return "create snap";
+ case POOL_OP_DELETE_SNAP: return "delete snap";
+ case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
+ case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
+ }
+ return "???";
+}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index f7048da92ac..3be33fb066c 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p)
static int monc_show(struct seq_file *s, void *p)
{
struct ceph_client *client = s->private;
- struct ceph_mon_statfs_request *req;
+ struct ceph_mon_generic_request *req;
struct ceph_mon_client *monc = &client->monc;
struct rb_node *rp;
@@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p)
if (monc->want_next_osdmap)
seq_printf(s, "want next osdmap\n");
- for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) {
- req = rb_entry(rp, struct ceph_mon_statfs_request, node);
- seq_printf(s, "%lld statfs\n", req->tid);
+ for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
+ __u16 op;
+ req = rb_entry(rp, struct ceph_mon_generic_request, node);
+ op = le16_to_cpu(req->request->hdr.type);
+ if (op == CEPH_MSG_STATFS)
+ seq_printf(s, "%lld statfs\n", req->tid);
+ else
+ seq_printf(s, "%lld unknown\n", req->tid);
}
mutex_unlock(&monc->mutex);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 650d2db5ed2..4fd30900eff 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry)
return -ENOMEM; /* oh well */
spin_lock(&dentry->d_lock);
- if (dentry->d_fsdata) /* lost a race */
+ if (dentry->d_fsdata) {
+ /* lost a race */
+ kmem_cache_free(ceph_dentry_cachep, di);
goto out_unlock;
+ }
di->dentry = dentry;
di->lease_session = NULL;
dentry->d_fsdata = di;
@@ -125,7 +128,8 @@ more:
dentry = list_entry(p, struct dentry, d_u.d_child);
di = ceph_dentry(dentry);
while (1) {
- dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next,
+ dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
+ d_unhashed(dentry) ? "!hashed" : "hashed",
parent->d_subdirs.prev, parent->d_subdirs.next);
if (p == &parent->d_subdirs) {
fi->at_end = 1;
@@ -229,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
u32 ftype;
struct ceph_mds_reply_info_parsed *rinfo;
const int max_entries = client->mount_args->max_readdir;
+ const int max_bytes = client->mount_args->max_readdir_bytes;
dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
if (fi->at_end)
@@ -312,6 +317,7 @@ more:
req->r_readdir_offset = fi->next_offset;
req->r_args.readdir.frag = cpu_to_le32(frag);
req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
+ req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
req->r_num_caps = max_entries + 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) {
@@ -335,7 +341,7 @@ more:
if (req->r_reply_info.dir_end) {
kfree(fi->last_name);
fi->last_name = NULL;
- fi->next_offset = 0;
+ fi->next_offset = 2;
} else {
rinfo = &req->r_reply_info;
err = note_last_dentry(fi,
@@ -478,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err)
{
- struct ceph_client *client = ceph_client(dentry->d_sb);
+ struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
struct inode *parent = dentry->d_parent->d_inode;
/* .snap dir? */
@@ -568,7 +574,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
!is_root_ceph_dentry(dir, dentry) &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
- di->offset = ci->i_max_offset++;
spin_unlock(&dir->i_lock);
dout(" dir %p complete, -ENOENT\n", dir);
d_add(dentry, NULL);
@@ -888,13 +893,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
/* ensure target dentry is invalidated, despite
rehashing bug in vfs_rename_dir */
- new_dentry->d_time = jiffies;
- ceph_dentry(new_dentry)->lease_shared_gen = 0;
+ ceph_invalidate_dentry_lease(new_dentry);
}
ceph_mdsc_put_request(req);
return err;
}
+/*
+ * Ensure a dentry lease will no longer revalidate.
+ */
+void ceph_invalidate_dentry_lease(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ dentry->d_time = jiffies;
+ ceph_dentry(dentry)->lease_shared_gen = 0;
+ spin_unlock(&dentry->d_lock);
+}
/*
* Check if dentry lease is valid. If not, delete the lease. Try to
@@ -972,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir = dentry->d_parent->d_inode;
- dout("d_revalidate %p '%.*s' inode %p\n", dentry,
- dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+ dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
+ dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
+ ceph_dentry(dentry)->offset);
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -1050,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
struct ceph_inode_info *ci = ceph_inode(inode);
int left;
- if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT))
+ if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
return -EISDIR;
if (!cf->dir_info) {
@@ -1152,7 +1167,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
dn->d_name.len, dn->d_name.name);
if (di) {
- mdsc = &ceph_client(dn->d_sb)->mdsc;
+ mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_add_tail(&di->lru, &mdsc->dentry_lru);
mdsc->num_dentry++;
@@ -1165,10 +1180,10 @@ void ceph_dentry_lru_touch(struct dentry *dn)
struct ceph_dentry_info *di = ceph_dentry(dn);
struct ceph_mds_client *mdsc;
- dout("dentry_lru_touch %p %p '%.*s'\n", di, dn,
- dn->d_name.len, dn->d_name.name);
+ dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
+ dn->d_name.len, dn->d_name.name, di->offset);
if (di) {
- mdsc = &ceph_client(dn->d_sb)->mdsc;
+ mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_move_tail(&di->lru, &mdsc->dentry_lru);
spin_unlock(&mdsc->dentry_lru_lock);
@@ -1183,7 +1198,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
dn->d_name.len, dn->d_name.name);
if (di) {
- mdsc = &ceph_client(dn->d_sb)->mdsc;
+ mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_del_init(&di->lru);
mdsc->num_dentry--;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 9d67572fb32..17447644d67 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -93,11 +93,11 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
return ERR_PTR(-ESTALE);
dentry = d_obtain_alias(inode);
- if (!dentry) {
+ if (IS_ERR(dentry)) {
pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
fh->ino, inode);
iput(inode);
- return ERR_PTR(-ENOMEM);
+ return dentry;
}
err = ceph_init_dentry(dentry);
@@ -115,7 +115,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
static struct dentry *__cfh_to_dentry(struct super_block *sb,
struct ceph_nfs_confh *cfh)
{
- struct ceph_mds_client *mdsc = &ceph_client(sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
struct dentry *dentry;
struct ceph_vino vino;
@@ -149,11 +149,11 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
}
dentry = d_obtain_alias(inode);
- if (!dentry) {
+ if (IS_ERR(dentry)) {
pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
cfh->ino, inode);
iput(inode);
- return ERR_PTR(-ENOMEM);
+ return dentry;
}
err = ceph_init_dentry(dentry);
if (err < 0) {
@@ -202,11 +202,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
return ERR_PTR(-ESTALE);
dentry = d_obtain_alias(inode);
- if (!dentry) {
+ if (IS_ERR(dentry)) {
pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n",
cfh->ino, inode);
iput(inode);
- return ERR_PTR(-ENOMEM);
+ return dentry;
}
err = ceph_init_dentry(dentry);
if (err < 0) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7d634938edc..6512b6701b9 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -317,16 +317,16 @@ void ceph_release_page_vector(struct page **pages, int num_pages)
/*
* allocate a vector new pages
*/
-static struct page **alloc_page_vector(int num_pages)
+struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
{
struct page **pages;
int i;
- pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
+ pages = kmalloc(sizeof(*pages) * num_pages, flags);
if (!pages)
return ERR_PTR(-ENOMEM);
for (i = 0; i < num_pages; i++) {
- pages[i] = alloc_page(GFP_NOFS);
+ pages[i] = __page_cache_alloc(flags);
if (pages[i] == NULL) {
ceph_release_page_vector(pages, i);
return ERR_PTR(-ENOMEM);
@@ -540,7 +540,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
* in sequence.
*/
} else {
- pages = alloc_page_vector(num_pages);
+ pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
}
if (IS_ERR(pages))
return PTR_ERR(pages);
@@ -649,8 +649,8 @@ more:
do_sync,
ci->i_truncate_seq, ci->i_truncate_size,
&mtime, false, 2);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ if (!req)
+ return -ENOMEM;
num_pages = calc_pages_for(pos, len);
@@ -668,7 +668,7 @@ more:
truncate_inode_pages_range(inode->i_mapping, pos,
(pos+len) | (PAGE_CACHE_SIZE-1));
} else {
- pages = alloc_page_vector(num_pages);
+ pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto out;
@@ -809,7 +809,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc;
+ struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
loff_t endoff = pos + iov->iov_len;
int got = 0;
int ret, err;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 85b4d2ffdeb..a81b8b662c7 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -384,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode)
*/
if (ci->i_snap_realm) {
struct ceph_mds_client *mdsc =
- &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct ceph_snap_realm *realm = ci->i_snap_realm;
dout(" dropping residual ref to snap realm %p\n", realm);
@@ -619,11 +619,12 @@ static int fill_inode(struct inode *inode,
memcpy(ci->i_xattrs.blob->vec.iov_base,
iinfo->xattr_data, iinfo->xattr_len);
ci->i_xattrs.version = le64_to_cpu(info->xattr_version);
+ xattr_blob = NULL;
}
inode->i_mapping->a_ops = &ceph_aops;
inode->i_mapping->backing_dev_info =
- &ceph_client(inode->i_sb)->backing_dev_info;
+ &ceph_sb_to_client(inode->i_sb)->backing_dev_info;
switch (inode->i_mode & S_IFMT) {
case S_IFIFO:
@@ -674,14 +675,15 @@ static int fill_inode(struct inode *inode,
/* set dir completion flag? */
if (ci->i_files == 0 && ci->i_subdirs == 0 &&
ceph_snap(inode) == CEPH_NOSNAP &&
- (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED)) {
+ (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+ (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
ci->i_ceph_flags |= CEPH_I_COMPLETE;
ci->i_max_offset = 2;
}
/* it may be better to set st_size in getattr instead? */
- if (ceph_test_opt(ceph_client(inode->i_sb), RBYTES))
+ if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
inode->i_size = ci->i_rbytes;
break;
default:
@@ -802,6 +