aboutsummaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c11
-rw-r--r--fs/ceph/auth.c8
-rw-r--r--fs/ceph/auth.h8
-rw-r--r--fs/ceph/auth_none.c9
-rw-r--r--fs/ceph/auth_x.c31
-rw-r--r--fs/ceph/caps.c121
-rw-r--r--fs/ceph/ceph_fs.h83
-rw-r--r--fs/ceph/ceph_strings.c16
-rw-r--r--fs/ceph/debugfs.c13
-rw-r--r--fs/ceph/dir.c52
-rw-r--r--fs/ceph/export.c16
-rw-r--r--fs/ceph/file.c21
-rw-r--r--fs/ceph/inode.c99
-rw-r--r--fs/ceph/ioctl.c2
-rw-r--r--fs/ceph/mds_client.c428
-rw-r--r--fs/ceph/mds_client.h12
-rw-r--r--fs/ceph/messenger.c97
-rw-r--r--fs/ceph/messenger.h11
-rw-r--r--fs/ceph/mon_client.c264
-rw-r--r--fs/ceph/mon_client.h27
-rw-r--r--fs/ceph/msgpool.c180
-rw-r--r--fs/ceph/msgpool.h12
-rw-r--r--fs/ceph/msgr.h21
-rw-r--r--fs/ceph/osd_client.c105
-rw-r--r--fs/ceph/osdmap.c2
-rw-r--r--fs/ceph/pagelist.c2
-rw-r--r--fs/ceph/rados.h23
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c142
-rw-r--r--fs/ceph/super.h33
-rw-r--r--fs/ceph/xattr.c35
31 files changed, 1043 insertions, 843 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a9005d862ed..d9c60b84949 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
int rc = 0;
struct page **pages;
- struct pagevec pvec;
loff_t offset;
u64 len;
@@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
if (rc < 0)
goto out;
- /* set uptodate and add to lru in pagevec-sized chunks */
- pagevec_init(&pvec, 0);
for (; !list_empty(page_list) && len > 0;
rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) {
struct page *page =
@@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
zero_user_segment(page, s, PAGE_CACHE_SIZE);
}
- if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) {
+ if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) {
page_cache_release(page);
dout("readpages %p add_to_page_cache failed %p\n",
inode, page);
@@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
flush_dcache_page(page);
SetPageUptodate(page);
unlock_page(page);
- if (pagevec_add(&pvec, page) == 0)
- pagevec_lru_add_file(&pvec); /* add to lru */
+ page_cache_release(page);
}
- pagevec_lru_add_file(&pvec);
rc = 0;
out:
@@ -568,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req,
ceph_release_pages(req->r_pages, req->r_num_pages);
if (req->r_pages_from_pool)
mempool_free(req->r_pages,
- ceph_client(inode->i_sb)->wb_pagevec_pool);
+ ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
else
kfree(req->r_pages);
ceph_osdc_put_request(req);
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index 818afe72e6c..89490beaf53 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -1,7 +1,6 @@
#include "ceph_debug.h"
#include <linux/module.h>
-#include <linux/slab.h>
#include <linux/err.h>
#include <linux/slab.h>
@@ -150,7 +149,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac,
ret = ac->ops->build_request(ac, p + sizeof(u32), end);
if (ret < 0) {
- pr_err("error %d building request\n", ret);
+ pr_err("error %d building auth method %s request\n", ret,
+ ac->ops->name);
return ret;
}
dout(" built request %d bytes\n", ret);
@@ -229,7 +229,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
if (ret == -EAGAIN) {
return ceph_build_auth_request(ac, reply_buf, reply_len);
} else if (ret) {
- pr_err("authentication error %d\n", ret);
+ pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
return ret;
}
return 0;
@@ -246,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
if (!ac->protocol)
return ceph_auth_build_hello(ac, msg_buf, msg_len);
BUG_ON(!ac->ops);
- if (!ac->ops->is_authenticated(ac))
+ if (ac->ops->should_authenticate(ac))
return ceph_build_auth_request(ac, msg_buf, msg_len);
return 0;
}
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h
index ca4f57cfb26..d38a2fb4a13 100644
--- a/fs/ceph/auth.h
+++ b/fs/ceph/auth.h
@@ -15,6 +15,8 @@ struct ceph_auth_client;
struct ceph_authorizer;
struct ceph_auth_client_ops {
+ const char *name;
+
/*
* true if we are authenticated and can connect to
* services.
@@ -22,6 +24,12 @@ struct ceph_auth_client_ops {
int (*is_authenticated)(struct ceph_auth_client *ac);
/*
+ * true if we should (re)authenticate, e.g., when our tickets
+ * are getting old and crusty.
+ */
+ int (*should_authenticate)(struct ceph_auth_client *ac);
+
+ /*
* build requests and process replies during monitor
* handshake. if handle_reply returns -EAGAIN, we build
* another request.
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
index 8cd9e3af07f..ad1dc21286c 100644
--- a/fs/ceph/auth_none.c
+++ b/fs/ceph/auth_none.c
@@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac)
return !xi->starting;
}
+static int should_authenticate(struct ceph_auth_client *ac)
+{
+ struct ceph_auth_none_info *xi = ac->private;
+
+ return xi->starting;
+}
+
/*
* the generic auth code decode the global_id, and we carry no actual
* authenticate state, so nothing happens here.
@@ -94,9 +101,11 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
}
static const struct ceph_auth_client_ops ceph_auth_none_ops = {
+ .name = "none",
.reset = reset,
.destroy = destroy,
.is_authenticated = is_authenticated,
+ .should_authenticate = should_authenticate,
.handle_reply = handle_reply,
.create_authorizer = ceph_auth_none_create_authorizer,
.destroy_authorizer = ceph_auth_none_destroy_authorizer,
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index fee5a08da88..83d4d2785ff 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
return (ac->want_keys & xi->have_keys) == ac->want_keys;
}
+static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
+{
+ struct ceph_x_info *xi = ac->private;
+ int need;
+
+ ceph_x_validate_tickets(ac, &need);
+ dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
+ ac->want_keys, need, xi->have_keys);
+ return need != 0;
+}
+
static int ceph_x_encrypt_buflen(int ilen)
{
return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
@@ -127,7 +138,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
int ret;
char *dbuf;
char *ticket_buf;
- u8 struct_v;
+ u8 reply_struct_v;
dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
if (!dbuf)
@@ -139,14 +150,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
goto out_dbuf;
ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
- struct_v = ceph_decode_8(&p);
- if (struct_v != 1)
+ reply_struct_v = ceph_decode_8(&p);
+ if (reply_struct_v != 1)
goto bad;
num = ceph_decode_32(&p);
dout("%d tickets\n", num);
while (num--) {
int type;
- u8 struct_v;
+ u8 tkt_struct_v, blob_struct_v;
struct ceph_x_ticket_handler *th;
void *dp, *dend;
int dlen;
@@ -165,8 +176,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
type = ceph_decode_32(&p);
dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
- struct_v = ceph_decode_8(&p);
- if (struct_v != 1)
+ tkt_struct_v = ceph_decode_8(&p);
+ if (tkt_struct_v != 1)
goto bad;
th = get_ticket_handler(ac, type);
@@ -186,8 +197,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
dend = dbuf + dlen;
dp = dbuf;
- struct_v = ceph_decode_8(&dp);
- if (struct_v != 1)
+ tkt_struct_v = ceph_decode_8(&dp);
+ if (tkt_struct_v != 1)
goto bad;
memcpy(&old_key, &th->session_key, sizeof(old_key));
@@ -224,7 +235,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
tpend = tp + dlen;
dout(" ticket blob is %d bytes\n", dlen);
ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
- struct_v = ceph_decode_8(&tp);
+ blob_struct_v = ceph_decode_8(&tp);
new_secret_id = ceph_decode_64(&tp);
ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
if (ret)
@@ -618,7 +629,9 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
static const struct ceph_auth_client_ops ceph_x_ops = {
+ .name = "x",
.is_authenticated = ceph_x_is_authenticated,
+ .should_authenticate = ceph_x_should_authenticate,
.build_request = ceph_x_build_request,
.handle_reply = ceph_x_handle_reply,
.create_authorizer = ceph_x_create_authorizer,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d9400534b27..619b61655ee 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
- struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
int removed = 0;
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
@@ -937,9 +938,9 @@ static int send_cap_msg(struct ceph_mds_session *session,
seq, issue_seq, mseq, follows, size, max_size,
xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL);
- if (IS_ERR(msg))
- return PTR_ERR(msg);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS);
+ if (!msg)
+ return -ENOMEM;
msg->hdr.tid = cpu_to_le64(flush_tid);
@@ -980,6 +981,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
return 0;
}
+static void __queue_cap_release(struct ceph_mds_session *session,
+ u64 ino, u64 cap_id, u32 migrate_seq,
+ u32 issue_seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_cap_release *head;
+ struct ceph_mds_cap_item *item;
+
+ spin_lock(&session->s_cap_lock);
+ BUG_ON(!session->s_num_cap_releases);
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+
+ dout(" adding %llx release to mds%d msg %p (%d left)\n",
+ ino, session->s_mds, msg, session->s_num_cap_releases);
+
+ BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+ head = msg->front.iov_base;
+ head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
+ item = msg->front.iov_base + msg->front.iov_len;
+ item->ino = cpu_to_le64(ino);
+ item->cap_id = cpu_to_le64(cap_id);
+ item->migrate_seq = cpu_to_le32(migrate_seq);
+ item->seq = cpu_to_le32(issue_seq);
+
+ session->s_num_cap_releases--;
+
+ msg->front.iov_len += sizeof(*item);
+ if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+ dout(" release msg %p full\n", msg);
+ list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+ } else {
+ dout(" release msg %p at %d/%d (%d)\n", msg,
+ (int)le32_to_cpu(head->num),
+ (int)CEPH_CAPS_PER_RELEASE,
+ (int)msg->front.iov_len);
+ }
+ spin_unlock(&session->s_cap_lock);
+}
+
/*
* Queue cap releases when an inode is dropped from our cache. Since
* inode is about to be destroyed, there is no need for i_lock.
@@ -993,41 +1034,9 @@ void ceph_queue_caps_release(struct inode *inode)
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
struct ceph_mds_session *session = cap->session;
- struct ceph_msg *msg;
- struct ceph_mds_cap_release *head;
- struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
- BUG_ON(!session->s_num_cap_releases);
- msg = list_first_entry(&session->s_cap_releases,
- struct ceph_msg, list_head);
-
- dout(" adding %p release to mds%d msg %p (%d left)\n",
- inode, session->s_mds, msg, session->s_num_cap_releases);
-
- BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
- head = msg->front.iov_base;
- head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
- item = msg->front.iov_base + msg->front.iov_len;
- item->ino = cpu_to_le64(ceph_ino(inode));
- item->cap_id = cpu_to_le64(cap->cap_id);
- item->migrate_seq = cpu_to_le32(cap->mseq);
- item->seq = cpu_to_le32(cap->issue_seq);
-
- session->s_num_cap_releases--;
-
- msg->front.iov_len += sizeof(*item);
- if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
- dout(" release msg %p full\n", msg);
- list_move_tail(&msg->list_head,
- &session->s_cap_releases_done);
- } else {
- dout(" release msg %p at %d/%d (%d)\n", msg,
- (int)le32_to_cpu(head->num),
- (int)CEPH_CAPS_PER_RELEASE,
- (int)msg->front.iov_len);
- }
- spin_unlock(&session->s_cap_lock);
+ __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+ cap->mseq, cap->issue_seq);
p = rb_next(p);
__ceph_remove_cap(cap);
}
@@ -1298,7 +1307,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
*/
void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
{
- struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct inode *inode = &ci->vfs_inode;
int was = ci->i_dirty_caps;
int dirty = 0;
@@ -1336,7 +1346,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
static int __mark_caps_flushing(struct inode *inode,
struct ceph_mds_session *session)
{
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int flushing;
@@ -1663,7 +1673,7 @@ ack:
static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
unsigned *flush_tid)
{
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int unlock_session = session ? 0 : 1;
int flushing = 0;
@@ -1716,10 +1726,9 @@ out_unlocked:
static int caps_are_flushed(struct inode *inode, unsigned tid)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- int dirty, i, ret = 1;
+ int i, ret = 1;
spin_lock(&inode->i_lock);
- dirty = __ceph_caps_dirty(ci);
for (i = 0; i < CEPH_CAP_BITS; i++)
if ((ci->i_flushing_caps & (1 << i)) &&
ci->i_cap_flush_tid[i] <= tid) {
@@ -1775,9 +1784,9 @@ out:
spin_unlock(&ci->i_unsafe_lock);
}
-int ceph_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ceph_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
unsigned flush_tid;
int ret;
@@ -1829,7 +1838,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
err = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
} else {
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(inode->i_sb)->mdsc;
spin_lock(&inode->i_lock);
if (__ceph_caps_dirty(ci))
@@ -2411,7 +2421,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
__releases(inode->i_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
unsigned seq = le32_to_cpu(m->seq);
int dirty = le32_to_cpu(m->dirty);
int cleaned = 0;
@@ -2653,7 +2663,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_mds_caps *h;
int mds = session->s_mds;
int op;
- u32 seq;
+ u32 seq, mseq;
struct ceph_vino vino;
u64 cap_id;
u64 size, max_size;
@@ -2673,6 +2683,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap = CEPH_NOSNAP;
cap_id = le64_to_cpu(h->cap_id);
seq = le32_to_cpu(h->seq);
+ mseq = le32_to_cpu(h->migrate_seq);
size = le64_to_cpu(h->size);
max_size = le64_to_cpu(h->max_size);
@@ -2687,6 +2698,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap, inode);
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
+
+ if (op == CEPH_CAP_OP_IMPORT)
+ __queue_cap_release(session, vino.ino, cap_id,
+ mseq, seq);
+
+ /*
+ * send any full release message to try to move things
+ * along for the mds (who clearly thinks we still have this
+ * cap).
+ */
+ ceph_add_cap_releases(mdsc, session, -1);
+ ceph_send_cap_releases(mdsc, session);
goto done;
}
@@ -2712,7 +2735,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
spin_lock(&inode->i_lock);
cap = __get_cap_for_mds(ceph_inode(inode), mds);
if (!cap) {
- dout("no cap on %p ino %llx.%llx from mds%d, releasing\n",
+ dout(" no cap on %p ino %llx.%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode), mds);
spin_unlock(&inode->i_lock);
goto done;
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h
index 0c2241ef365..2fa992eaf7d 100644
--- a/fs/ceph/ceph_fs.h
+++ b/fs/ceph/ceph_fs.h
@@ -19,7 +19,7 @@
* Ceph release version
*/
#define CEPH_VERSION_MAJOR 0
-#define CEPH_VERSION_MINOR 19
+#define CEPH_VERSION_MINOR 20
#define CEPH_VERSION_PATCH 0
#define _CEPH_STRINGIFY(x) #x
@@ -36,7 +36,7 @@
* client-facing protocol.
*/
#define CEPH_OSD_PROTOCOL 8 /* cluster internal */
-#define CEPH_MDS_PROTOCOL 9 /* cluster internal */
+#define CEPH_MDS_PROTOCOL 12 /* cluster internal */
#define CEPH_MON_PROTOCOL 5 /* cluster internal */
#define CEPH_OSDC_PROTOCOL 24 /* server/client */
#define CEPH_MDSC_PROTOCOL 32 /* server/client */
@@ -53,8 +53,18 @@
/*
* feature bits
*/
-#define CEPH_FEATURE_SUPPORTED 0
-#define CEPH_FEATURE_REQUIRED 0
+#define CEPH_FEATURE_UID 1
+#define CEPH_FEATURE_NOSRCADDR 2
+#define CEPH_FEATURE_FLOCK 4
+
+#define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID
+#define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK
+#define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID
+#define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID
+#define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR
/*
@@ -91,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
#define CEPH_AUTH_NONE 0x1
#define CEPH_AUTH_CEPHX 0x2
+#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
+
/*********************************************
* message layer
@@ -128,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
#define CEPH_MSG_CLIENT_SNAP 0x312
#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
+/* pool ops */
+#define CEPH_MSG_POOLOP_REPLY 48
+#define CEPH_MSG_POOLOP 49
+
+
/* osd */
#define CEPH_MSG_OSD_MAP 41
#define CEPH_MSG_OSD_OP 42
#define CEPH_MSG_OSD_OPREPLY 43
+/* pool operations */
+enum {
+ POOL_OP_CREATE = 0x01,
+ POOL_OP_DELETE = 0x02,
+ POOL_OP_AUID_CHANGE = 0x03,
+ POOL_OP_CREATE_SNAP = 0x11,
+ POOL_OP_DELETE_SNAP = 0x12,
+ POOL_OP_CREATE_UNMANAGED_SNAP = 0x21,
+ POOL_OP_DELETE_UNMANAGED_SNAP = 0x22,
+};
+
struct ceph_mon_request_header {
__le64 have_version;
__le16 session_mon;
@@ -155,6 +183,31 @@ struct ceph_mon_statfs_reply {
struct ceph_statfs st;
} __attribute__ ((packed));
+const char *ceph_pool_op_name(int op);
+
+struct ceph_mon_poolop {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+ __le32 pool;
+ __le32 op;
+ __le64 auid;
+ __le64 snapid;
+ __le32 name_len;
+} __attribute__ ((packed));
+
+struct ceph_mon_poolop_reply {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+ __le32 reply_code;
+ __le32 epoch;
+ char has_data;
+ char data[0];
+} __attribute__ ((packed));
+
+struct ceph_mon_unmanaged_snap {
+ __le64 snapid;
+} __attribute__ ((packed));
+
struct ceph_osd_getmap {
struct ceph_mon_request_header monhdr;
struct ceph_fsid fsid;
@@ -212,16 +265,17 @@ extern const char *ceph_mds_state_name(int s);
* - they also define the lock ordering by the MDS
* - a few of these are internal to the mds
*/
-#define CEPH_LOCK_DN 1
-#define CEPH_LOCK_ISNAP 2
-#define CEPH_LOCK_IVERSION 4 /* mds internal */
-#define CEPH_LOCK_IFILE 8 /* mds internal */
-#define CEPH_LOCK_IAUTH 32
-#define CEPH_LOCK_ILINK 64
-#define CEPH_LOCK_IDFT 128 /* dir frag tree */
-#define CEPH_LOCK_INEST 256 /* mds internal */
-#define CEPH_LOCK_IXATTR 512
-#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */
+#define CEPH_LOCK_DVERSION 1
+#define CEPH_LOCK_DN 2
+#define CEPH_LOCK_ISNAP 16
+#define CEPH_LOCK_IVERSION 32 /* mds internal */
+#define CEPH_LOCK_IFILE 64
+#define CEPH_LOCK_IAUTH 128
+#define CEPH_LOCK_ILINK 256
+#define CEPH_LOCK_IDFT 512 /* dir frag tree */
+#define CEPH_LOCK_INEST 1024 /* mds internal */
+#define CEPH_LOCK_IXATTR 2048
+#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
/* client_session ops */
enum {
@@ -308,6 +362,7 @@ union ceph_mds_request_args {
struct {
__le32 frag; /* which dir fragment */
__le32 max_entries; /* how many dentries to grab */
+ __le32 max_bytes;
} __attribute__ ((packed)) readdir;
struct {
__le32 mode;
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c
index 8e4be6a80c6..7503aee828c 100644
--- a/fs/ceph/ceph_strings.c
+++ b/fs/ceph/ceph_strings.c
@@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type)
case CEPH_ENTITY_TYPE_OSD: return "osd";
case CEPH_ENTITY_TYPE_MON: return "mon";
case CEPH_ENTITY_TYPE_CLIENT: return "client";
- case CEPH_ENTITY_TYPE_ADMIN: return "admin";
case CEPH_ENTITY_TYPE_AUTH: return "auth";
default: return "unknown";
}
@@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op)
case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
case CEPH_OSD_OP_RMXATTR: return "rmxattr";
+ case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
case CEPH_OSD_OP_PULL: return "pull";
case CEPH_OSD_OP_PUSH: return "push";
@@ -174,3 +174,17 @@ const char *ceph_snap_op_name(int o)
}
return "???";
}
+
+const char *ceph_pool_op_name(int op)
+{
+ switch (op) {
+ case POOL_OP_CREATE: return "create";
+ case POOL_OP_DELETE: return "delete";
+ case POOL_OP_AUID_CHANGE: return "auid change";
+ case POOL_OP_CREATE_SNAP: return "create snap";
+ case POOL_OP_DELETE_SNAP: return "delete snap";
+ case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
+ case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
+ }
+ return "???";
+}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index f7048da92ac..3be33fb066c 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p)
static int monc_show(struct seq_file *s, void *p)
{
struct ceph_client *client = s->private;
- struct ceph_mon_statfs_request *req;
+ struct ceph_mon_generic_request *req;
struct ceph_mon_client *monc = &client->monc;
struct rb_node *rp;
@@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p)
if (monc->want_next_osdmap)
seq_printf(s, "want next osdmap\n");
- for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) {
- req = rb_entry(rp, struct ceph_mon_statfs_request, node);
- seq_printf(s, "%lld statfs\n", req->tid);
+ for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
+ __u16 op;
+ req = rb_entry(rp, struct ceph_mon_generic_request, node);
+ op = le16_to_cpu(req->request->hdr.type);
+ if (op == CEPH_MSG_STATFS)
+ seq_printf(s, "%lld statfs\n", req->tid);
+ else
+ seq_printf(s, "%lld unknown\n", req->tid);
}
mutex_unlock(&monc->mutex);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 650d2db5ed2..f85719310db 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry)
return -ENOMEM; /* oh well */
spin_lock(&dentry->d_lock);
- if (dentry->d_fsdata) /* lost a race */
+ if (dentry->d_fsdata) {
+ /* lost a race */
+ kmem_cache_free(ceph_dentry_cachep, di);
goto out_unlock;
+ }
di->dentry = dentry;
di->lease_session = NULL;
dentry->d_fsdata = di;
@@ -125,7 +128,8 @@ more:
dentry = list_entry(p, struct dentry, d_u.d_child);
di = ceph_dentry(dentry);
while (1) {
- dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next,
+ dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
+ d_unhashed(dentry) ? "!hashed" : "hashed",
parent->d_subdirs.prev, parent->d_subdirs.next);
if (p == &parent->d_subdirs) {
fi->at_end = 1;
@@ -229,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
u32 ftype;
struct ceph_mds_reply_info_parsed *rinfo;
const int max_entries = client->mount_args->max_readdir;
+ const int max_bytes = client->mount_args->max_readdir_bytes;
dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
if (fi->at_end)
@@ -312,6 +317,7 @@ more:
req->r_readdir_offset = fi->next_offset;
req->r_args.readdir.frag = cpu_to_le32(frag);
req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
+ req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
req->r_num_caps = max_entries + 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) {
@@ -335,7 +341,7 @@ more:
if (req->r_reply_info.dir_end) {
kfree(fi->last_name);
fi->last_name = NULL;
- fi->next_offset = 0;
+ fi->next_offset = 2;
} else {
rinfo = &req->r_reply_info;
err = note_last_dentry(fi,
@@ -478,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err)
{
- struct ceph_client *client = ceph_client(dentry->d_sb);
+ struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
struct inode *parent = dentry->d_parent->d_inode;
/* .snap dir? */
@@ -568,7 +574,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
!is_root_ceph_dentry(dir, dentry) &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
- di->offset = ci->i_max_offset++;
spin_unlock(&dir->i_lock);
dout(" dir %p complete, -ENOENT\n", dir);
d_add(dentry, NULL);
@@ -582,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
/* we only need inode linkage */
@@ -888,13 +893,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
/* ensure target dentry is invalidated, despite
rehashing bug in vfs_rename_dir */
- new_dentry->d_time = jiffies;
- ceph_dentry(new_dentry)->lease_shared_gen = 0;
+ ceph_invalidate_dentry_lease(new_dentry);
}
ceph_mdsc_put_request(req);
return err;
}
+/*
+ * Ensure a dentry lease will no longer revalidate.
+ */
+void ceph_invalidate_dentry_lease(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ dentry->d_time = jiffies;
+ ceph_dentry(dentry)->lease_shared_gen = 0;
+ spin_unlock(&dentry->d_lock);
+}
/*
* Check if dentry lease is valid. If not, delete the lease. Try to
@@ -972,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir = dentry->d_parent->d_inode;
- dout("d_revalidate %p '%.*s' inode %p\n", dentry,
- dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+ dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
+ dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
+ ceph_dentry(dentry)->offset);
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -1050,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
struct ceph_inode_info *ci = ceph_inode(inode);
int left;
- if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT))
+ if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
return -EISDIR;
if (!cf->dir_info) {
@@ -1092,10 +1107,9 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
* an fsync() on a dir will wait for any uncommitted directory
* operations to commit.
*/
-static int ceph_dir_fsync(st