diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/addr.c | 11 | ||||
-rw-r--r-- | fs/ceph/auth.c | 8 | ||||
-rw-r--r-- | fs/ceph/auth.h | 8 | ||||
-rw-r--r-- | fs/ceph/auth_none.c | 9 | ||||
-rw-r--r-- | fs/ceph/auth_x.c | 31 | ||||
-rw-r--r-- | fs/ceph/caps.c | 121 | ||||
-rw-r--r-- | fs/ceph/ceph_fs.h | 83 | ||||
-rw-r--r-- | fs/ceph/ceph_strings.c | 16 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 13 | ||||
-rw-r--r-- | fs/ceph/dir.c | 52 | ||||
-rw-r--r-- | fs/ceph/export.c | 16 | ||||
-rw-r--r-- | fs/ceph/file.c | 21 | ||||
-rw-r--r-- | fs/ceph/inode.c | 99 | ||||
-rw-r--r-- | fs/ceph/ioctl.c | 2 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 428 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 12 | ||||
-rw-r--r-- | fs/ceph/messenger.c | 97 | ||||
-rw-r--r-- | fs/ceph/messenger.h | 11 | ||||
-rw-r--r-- | fs/ceph/mon_client.c | 264 | ||||
-rw-r--r-- | fs/ceph/mon_client.h | 27 | ||||
-rw-r--r-- | fs/ceph/msgpool.c | 180 | ||||
-rw-r--r-- | fs/ceph/msgpool.h | 12 | ||||
-rw-r--r-- | fs/ceph/msgr.h | 21 | ||||
-rw-r--r-- | fs/ceph/osd_client.c | 105 | ||||
-rw-r--r-- | fs/ceph/osdmap.c | 2 | ||||
-rw-r--r-- | fs/ceph/pagelist.c | 2 | ||||
-rw-r--r-- | fs/ceph/rados.h | 23 | ||||
-rw-r--r-- | fs/ceph/snap.c | 2 | ||||
-rw-r--r-- | fs/ceph/super.c | 142 | ||||
-rw-r--r-- | fs/ceph/super.h | 33 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 35 |
31 files changed, 1043 insertions, 843 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a9005d862ed..d9c60b84949 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; int rc = 0; struct page **pages; - struct pagevec pvec; loff_t offset; u64 len; @@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, if (rc < 0) goto out; - /* set uptodate and add to lru in pagevec-sized chunks */ - pagevec_init(&pvec, 0); for (; !list_empty(page_list) && len > 0; rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { struct page *page = @@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, zero_user_segment(page, s, PAGE_CACHE_SIZE); } - if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { page_cache_release(page); dout("readpages %p add_to_page_cache failed %p\n", inode, page); @@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, flush_dcache_page(page); SetPageUptodate(page); unlock_page(page); - if (pagevec_add(&pvec, page) == 0) - pagevec_lru_add_file(&pvec); /* add to lru */ + page_cache_release(page); } - pagevec_lru_add_file(&pvec); rc = 0; out: @@ -568,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req, ceph_release_pages(req->r_pages, req->r_num_pages); if (req->r_pages_from_pool) mempool_free(req->r_pages, - ceph_client(inode->i_sb)->wb_pagevec_pool); + ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); else kfree(req->r_pages); ceph_osdc_put_request(req); diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 818afe72e6c..89490beaf53 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -1,7 +1,6 @@ #include "ceph_debug.h" #include <linux/module.h> -#include <linux/slab.h> #include <linux/err.h> #include <linux/slab.h> @@ -150,7 +149,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac, ret = ac->ops->build_request(ac, p + sizeof(u32), end); if (ret < 0) { - pr_err("error %d building request\n", ret); + pr_err("error %d building auth method %s request\n", ret, + ac->ops->name); return ret; } dout(" built request %d bytes\n", ret); @@ -229,7 +229,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, if (ret == -EAGAIN) { return ceph_build_auth_request(ac, reply_buf, reply_len); } else if (ret) { - pr_err("authentication error %d\n", ret); + pr_err("auth method '%s' error %d\n", ac->ops->name, ret); return ret; } return 0; @@ -246,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac, if (!ac->protocol) return ceph_auth_build_hello(ac, msg_buf, msg_len); BUG_ON(!ac->ops); - if (!ac->ops->is_authenticated(ac)) + if (ac->ops->should_authenticate(ac)) return ceph_build_auth_request(ac, msg_buf, msg_len); return 0; } diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index ca4f57cfb26..d38a2fb4a13 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h @@ -15,6 +15,8 @@ struct ceph_auth_client; struct ceph_authorizer; struct ceph_auth_client_ops { + const char *name; + /* * true if we are authenticated and can connect to * services. @@ -22,6 +24,12 @@ struct ceph_auth_client_ops { int (*is_authenticated)(struct ceph_auth_client *ac); /* + * true if we should (re)authenticate, e.g., when our tickets + * are getting old and crusty. + */ + int (*should_authenticate)(struct ceph_auth_client *ac); + + /* * build requests and process replies during monitor * handshake. if handle_reply returns -EAGAIN, we build * another request. diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 8cd9e3af07f..ad1dc21286c 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c @@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac) return !xi->starting; } +static int should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_auth_none_info *xi = ac->private; + + return xi->starting; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -94,9 +101,11 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, } static const struct ceph_auth_client_ops ceph_auth_none_ops = { + .name = "none", .reset = reset, .destroy = destroy, .is_authenticated = is_authenticated, + .should_authenticate = should_authenticate, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index fee5a08da88..83d4d2785ff 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) return (ac->want_keys & xi->have_keys) == ac->want_keys; } +static int ceph_x_should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi = ac->private; + int need; + + ceph_x_validate_tickets(ac, &need); + dout("ceph_x_should_authenticate want=%d need=%d have=%d\n", + ac->want_keys, need, xi->have_keys); + return need != 0; +} + static int ceph_x_encrypt_buflen(int ilen) { return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + @@ -127,7 +138,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, int ret; char *dbuf; char *ticket_buf; - u8 struct_v; + u8 reply_struct_v; dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); if (!dbuf) @@ -139,14 +150,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, goto out_dbuf; ceph_decode_need(&p, end, 1 + sizeof(u32), bad); - struct_v = ceph_decode_8(&p); - if (struct_v != 1) + reply_struct_v = ceph_decode_8(&p); + if (reply_struct_v != 1) goto bad; num = ceph_decode_32(&p); dout("%d tickets\n", num); while (num--) { int type; - u8 struct_v; + u8 tkt_struct_v, blob_struct_v; struct ceph_x_ticket_handler *th; void *dp, *dend; int dlen; @@ -165,8 +176,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, type = ceph_decode_32(&p); dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); - struct_v = ceph_decode_8(&p); - if (struct_v != 1) + tkt_struct_v = ceph_decode_8(&p); + if (tkt_struct_v != 1) goto bad; th = get_ticket_handler(ac, type); @@ -186,8 +197,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, dend = dbuf + dlen; dp = dbuf; - struct_v = ceph_decode_8(&dp); - if (struct_v != 1) + tkt_struct_v = ceph_decode_8(&dp); + if (tkt_struct_v != 1) goto bad; memcpy(&old_key, &th->session_key, sizeof(old_key)); @@ -224,7 +235,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, tpend = tp + dlen; dout(" ticket blob is %d bytes\n", dlen); ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); - struct_v = ceph_decode_8(&tp); + blob_struct_v = ceph_decode_8(&tp); new_secret_id = ceph_decode_64(&tp); ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); if (ret) @@ -618,7 +629,9 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, static const struct ceph_auth_client_ops ceph_x_ops = { + .name = "x", .is_authenticated = ceph_x_is_authenticated, + .should_authenticate = ceph_x_should_authenticate, .build_request = ceph_x_build_request, .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d9400534b27..619b61655ee 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap) { struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; - struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; + struct ceph_mds_client *mdsc = + &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; int removed = 0; dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); @@ -937,9 +938,9 @@ static int send_cap_msg(struct ceph_mds_session *session, seq, issue_seq, mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL); - if (IS_ERR(msg)) - return PTR_ERR(msg); + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS); + if (!msg) + return -ENOMEM; msg->hdr.tid = cpu_to_le64(flush_tid); @@ -980,6 +981,46 @@ static int send_cap_msg(struct ceph_mds_session *session, return 0; } +static void __queue_cap_release(struct ceph_mds_session *session, + u64 ino, u64 cap_id, u32 migrate_seq, + u32 issue_seq) +{ + struct ceph_msg *msg; + struct ceph_mds_cap_release *head; + struct ceph_mds_cap_item *item; + + spin_lock(&session->s_cap_lock); + BUG_ON(!session->s_num_cap_releases); + msg = list_first_entry(&session->s_cap_releases, + struct ceph_msg, list_head); + + dout(" adding %llx release to mds%d msg %p (%d left)\n", + ino, session->s_mds, msg, session->s_num_cap_releases); + + BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); + head = msg->front.iov_base; + head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); + item = msg->front.iov_base + msg->front.iov_len; + item->ino = cpu_to_le64(ino); + item->cap_id = cpu_to_le64(cap_id); + item->migrate_seq = cpu_to_le32(migrate_seq); + item->seq = cpu_to_le32(issue_seq); + + session->s_num_cap_releases--; + + msg->front.iov_len += sizeof(*item); + if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { + dout(" release msg %p full\n", msg); + list_move_tail(&msg->list_head, &session->s_cap_releases_done); + } else { + dout(" release msg %p at %d/%d (%d)\n", msg, + (int)le32_to_cpu(head->num), + (int)CEPH_CAPS_PER_RELEASE, + (int)msg->front.iov_len); + } + spin_unlock(&session->s_cap_lock); +} + /* * Queue cap releases when an inode is dropped from our cache. Since * inode is about to be destroyed, there is no need for i_lock. @@ -993,41 +1034,9 @@ void ceph_queue_caps_release(struct inode *inode) while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); struct ceph_mds_session *session = cap->session; - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - struct ceph_mds_cap_item *item; - spin_lock(&session->s_cap_lock); - BUG_ON(!session->s_num_cap_releases); - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - - dout(" adding %p release to mds%d msg %p (%d left)\n", - inode, session->s_mds, msg, session->s_num_cap_releases); - - BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); - head = msg->front.iov_base; - head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); - item = msg->front.iov_base + msg->front.iov_len; - item->ino = cpu_to_le64(ceph_ino(inode)); - item->cap_id = cpu_to_le64(cap->cap_id); - item->migrate_seq = cpu_to_le32(cap->mseq); - item->seq = cpu_to_le32(cap->issue_seq); - - session->s_num_cap_releases--; - - msg->front.iov_len += sizeof(*item); - if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { - dout(" release msg %p full\n", msg); - list_move_tail(&msg->list_head, - &session->s_cap_releases_done); - } else { - dout(" release msg %p at %d/%d (%d)\n", msg, - (int)le32_to_cpu(head->num), - (int)CEPH_CAPS_PER_RELEASE, - (int)msg->front.iov_len); - } - spin_unlock(&session->s_cap_lock); + __queue_cap_release(session, ceph_ino(inode), cap->cap_id, + cap->mseq, cap->issue_seq); p = rb_next(p); __ceph_remove_cap(cap); } @@ -1298,7 +1307,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) */ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) { - struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; + struct ceph_mds_client *mdsc = + &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; struct inode *inode = &ci->vfs_inode; int was = ci->i_dirty_caps; int dirty = 0; @@ -1336,7 +1346,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) static int __mark_caps_flushing(struct inode *inode, struct ceph_mds_session *session) { - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int flushing; @@ -1663,7 +1673,7 @@ ack: static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, unsigned *flush_tid) { - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int unlock_session = session ? 0 : 1; int flushing = 0; @@ -1716,10 +1726,9 @@ out_unlocked: static int caps_are_flushed(struct inode *inode, unsigned tid) { struct ceph_inode_info *ci = ceph_inode(inode); - int dirty, i, ret = 1; + int i, ret = 1; spin_lock(&inode->i_lock); - dirty = __ceph_caps_dirty(ci); for (i = 0; i < CEPH_CAP_BITS; i++) if ((ci->i_flushing_caps & (1 << i)) && ci->i_cap_flush_tid[i] <= tid) { @@ -1775,9 +1784,9 @@ out: spin_unlock(&ci->i_unsafe_lock); } -int ceph_fsync(struct file *file, struct dentry *dentry, int datasync) +int ceph_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); unsigned flush_tid; int ret; @@ -1829,7 +1838,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) err = wait_event_interruptible(ci->i_cap_wq, caps_are_flushed(inode, flush_tid)); } else { - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = + &ceph_sb_to_client(inode->i_sb)->mdsc; spin_lock(&inode->i_lock); if (__ceph_caps_dirty(ci)) @@ -2411,7 +2421,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, __releases(inode->i_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; unsigned seq = le32_to_cpu(m->seq); int dirty = le32_to_cpu(m->dirty); int cleaned = 0; @@ -2653,7 +2663,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_mds_caps *h; int mds = session->s_mds; int op; - u32 seq; + u32 seq, mseq; struct ceph_vino vino; u64 cap_id; u64 size, max_size; @@ -2673,6 +2683,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, vino.snap = CEPH_NOSNAP; cap_id = le64_to_cpu(h->cap_id); seq = le32_to_cpu(h->seq); + mseq = le32_to_cpu(h->migrate_seq); size = le64_to_cpu(h->size); max_size = le64_to_cpu(h->max_size); @@ -2687,6 +2698,18 @@ void ceph_handle_caps(struct ceph_mds_session *session, vino.snap, inode); if (!inode) { dout(" i don't have ino %llx\n", vino.ino); + + if (op == CEPH_CAP_OP_IMPORT) + __queue_cap_release(session, vino.ino, cap_id, + mseq, seq); + + /* + * send any full release message to try to move things + * along for the mds (who clearly thinks we still have this + * cap). + */ + ceph_add_cap_releases(mdsc, session, -1); + ceph_send_cap_releases(mdsc, session); goto done; } @@ -2712,7 +2735,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, spin_lock(&inode->i_lock); cap = __get_cap_for_mds(ceph_inode(inode), mds); if (!cap) { - dout("no cap on %p ino %llx.%llx from mds%d, releasing\n", + dout(" no cap on %p ino %llx.%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), mds); spin_unlock(&inode->i_lock); goto done; diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 0c2241ef365..2fa992eaf7d 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -19,7 +19,7 @@ * Ceph release version */ #define CEPH_VERSION_MAJOR 0 -#define CEPH_VERSION_MINOR 19 +#define CEPH_VERSION_MINOR 20 #define CEPH_VERSION_PATCH 0 #define _CEPH_STRINGIFY(x) #x @@ -36,7 +36,7 @@ * client-facing protocol. */ #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ -#define CEPH_MDS_PROTOCOL 9 /* cluster internal */ +#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ #define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 24 /* server/client */ #define CEPH_MDSC_PROTOCOL 32 /* server/client */ @@ -53,8 +53,18 @@ /* * feature bits */ -#define CEPH_FEATURE_SUPPORTED 0 -#define CEPH_FEATURE_REQUIRED 0 +#define CEPH_FEATURE_UID 1 +#define CEPH_FEATURE_NOSRCADDR 2 +#define CEPH_FEATURE_FLOCK 4 + +#define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID +#define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK +#define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID +#define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID +#define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR /* @@ -91,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); #define CEPH_AUTH_NONE 0x1 #define CEPH_AUTH_CEPHX 0x2 +#define CEPH_AUTH_UID_DEFAULT ((__u64) -1) + /********************************************* * message layer @@ -128,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); #define CEPH_MSG_CLIENT_SNAP 0x312 #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 +/* pool ops */ +#define CEPH_MSG_POOLOP_REPLY 48 +#define CEPH_MSG_POOLOP 49 + + /* osd */ #define CEPH_MSG_OSD_MAP 41 #define CEPH_MSG_OSD_OP 42 #define CEPH_MSG_OSD_OPREPLY 43 +/* pool operations */ +enum { + POOL_OP_CREATE = 0x01, + POOL_OP_DELETE = 0x02, + POOL_OP_AUID_CHANGE = 0x03, + POOL_OP_CREATE_SNAP = 0x11, + POOL_OP_DELETE_SNAP = 0x12, + POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, + POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, +}; + struct ceph_mon_request_header { __le64 have_version; __le16 session_mon; @@ -155,6 +183,31 @@ struct ceph_mon_statfs_reply { struct ceph_statfs st; } __attribute__ ((packed)); +const char *ceph_pool_op_name(int op); + +struct ceph_mon_poolop { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 pool; + __le32 op; + __le64 auid; + __le64 snapid; + __le32 name_len; +} __attribute__ ((packed)); + +struct ceph_mon_poolop_reply { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 reply_code; + __le32 epoch; + char has_data; + char data[0]; +} __attribute__ ((packed)); + +struct ceph_mon_unmanaged_snap { + __le64 snapid; +} __attribute__ ((packed)); + struct ceph_osd_getmap { struct ceph_mon_request_header monhdr; struct ceph_fsid fsid; @@ -212,16 +265,17 @@ extern const char *ceph_mds_state_name(int s); * - they also define the lock ordering by the MDS * - a few of these are internal to the mds */ -#define CEPH_LOCK_DN 1 -#define CEPH_LOCK_ISNAP 2 -#define CEPH_LOCK_IVERSION 4 /* mds internal */ -#define CEPH_LOCK_IFILE 8 /* mds internal */ -#define CEPH_LOCK_IAUTH 32 -#define CEPH_LOCK_ILINK 64 -#define CEPH_LOCK_IDFT 128 /* dir frag tree */ -#define CEPH_LOCK_INEST 256 /* mds internal */ -#define CEPH_LOCK_IXATTR 512 -#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_DVERSION 1 +#define CEPH_LOCK_DN 2 +#define CEPH_LOCK_ISNAP 16 +#define CEPH_LOCK_IVERSION 32 /* mds internal */ +#define CEPH_LOCK_IFILE 64 +#define CEPH_LOCK_IAUTH 128 +#define CEPH_LOCK_ILINK 256 +#define CEPH_LOCK_IDFT 512 /* dir frag tree */ +#define CEPH_LOCK_INEST 1024 /* mds internal */ +#define CEPH_LOCK_IXATTR 2048 +#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ /* client_session ops */ enum { @@ -308,6 +362,7 @@ union ceph_mds_request_args { struct { __le32 frag; /* which dir fragment */ __le32 max_entries; /* how many dentries to grab */ + __le32 max_bytes; } __attribute__ ((packed)) readdir; struct { __le32 mode; diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 8e4be6a80c6..7503aee828c 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c @@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type) case CEPH_ENTITY_TYPE_OSD: return "osd"; case CEPH_ENTITY_TYPE_MON: return "mon"; case CEPH_ENTITY_TYPE_CLIENT: return "client"; - case CEPH_ENTITY_TYPE_ADMIN: return "admin"; case CEPH_ENTITY_TYPE_AUTH: return "auth"; default: return "unknown"; } @@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; case CEPH_OSD_OP_RMXATTR: return "rmxattr"; + case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; case CEPH_OSD_OP_PULL: return "pull"; case CEPH_OSD_OP_PUSH: return "push"; @@ -174,3 +174,17 @@ const char *ceph_snap_op_name(int o) } return "???"; } + +const char *ceph_pool_op_name(int op) +{ + switch (op) { + case POOL_OP_CREATE: return "create"; + case POOL_OP_DELETE: return "delete"; + case POOL_OP_AUID_CHANGE: return "auid change"; + case POOL_OP_CREATE_SNAP: return "create snap"; + case POOL_OP_DELETE_SNAP: return "delete snap"; + case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; + case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; + } + return "???"; +} diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f7048da92ac..3be33fb066c 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p) static int monc_show(struct seq_file *s, void *p) { struct ceph_client *client = s->private; - struct ceph_mon_statfs_request *req; + struct ceph_mon_generic_request *req; struct ceph_mon_client *monc = &client->monc; struct rb_node *rp; @@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p) if (monc->want_next_osdmap) seq_printf(s, "want next osdmap\n"); - for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) { - req = rb_entry(rp, struct ceph_mon_statfs_request, node); - seq_printf(s, "%lld statfs\n", req->tid); + for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { + __u16 op; + req = rb_entry(rp, struct ceph_mon_generic_request, node); + op = le16_to_cpu(req->request->hdr.type); + if (op == CEPH_MSG_STATFS) + seq_printf(s, "%lld statfs\n", req->tid); + else + seq_printf(s, "%lld unknown\n", req->tid); } mutex_unlock(&monc->mutex); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 650d2db5ed2..f85719310db 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry) return -ENOMEM; /* oh well */ spin_lock(&dentry->d_lock); - if (dentry->d_fsdata) /* lost a race */ + if (dentry->d_fsdata) { + /* lost a race */ + kmem_cache_free(ceph_dentry_cachep, di); goto out_unlock; + } di->dentry = dentry; di->lease_session = NULL; dentry->d_fsdata = di; @@ -125,7 +128,8 @@ more: dentry = list_entry(p, struct dentry, d_u.d_child); di = ceph_dentry(dentry); while (1) { - dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next, + dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, + d_unhashed(dentry) ? "!hashed" : "hashed", parent->d_subdirs.prev, parent->d_subdirs.next); if (p == &parent->d_subdirs) { fi->at_end = 1; @@ -229,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) u32 ftype; struct ceph_mds_reply_info_parsed *rinfo; const int max_entries = client->mount_args->max_readdir; + const int max_bytes = client->mount_args->max_readdir_bytes; dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); if (fi->at_end) @@ -312,6 +317,7 @@ more: req->r_readdir_offset = fi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); req->r_args.readdir.max_entries = cpu_to_le32(max_entries); + req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); req->r_num_caps = max_entries + 1; err = ceph_mdsc_do_request(mdsc, NULL, req); if (err < 0) { @@ -335,7 +341,7 @@ more: if (req->r_reply_info.dir_end) { kfree(fi->last_name); fi->last_name = NULL; - fi->next_offset = 0; + fi->next_offset = 2; } else { rinfo = &req->r_reply_info; err = note_last_dentry(fi, @@ -478,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err) { - struct ceph_client *client = ceph_client(dentry->d_sb); + struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); struct inode *parent = dentry->d_parent->d_inode; /* .snap dir? */ @@ -568,7 +574,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, !is_root_ceph_dentry(dir, dentry) && (ci->i_ceph_flags & CEPH_I_COMPLETE) && (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { - di->offset = ci->i_max_offset++; spin_unlock(&dir->i_lock); dout(" dir %p complete, -ENOENT\n", dir); d_add(dentry, NULL); @@ -582,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; /* we only need inode linkage */ @@ -888,13 +893,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ - new_dentry->d_time = jiffies; - ceph_dentry(new_dentry)->lease_shared_gen = 0; + ceph_invalidate_dentry_lease(new_dentry); } ceph_mdsc_put_request(req); return err; } +/* + * Ensure a dentry lease will no longer revalidate. + */ +void ceph_invalidate_dentry_lease(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_time = jiffies; + ceph_dentry(dentry)->lease_shared_gen = 0; + spin_unlock(&dentry->d_lock); +} /* * Check if dentry lease is valid. If not, delete the lease. Try to @@ -972,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *dir = dentry->d_parent->d_inode; - dout("d_revalidate %p '%.*s' inode %p\n", dentry, - dentry->d_name.len, dentry->d_name.name, dentry->d_inode); + dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, + dentry->d_name.len, dentry->d_name.name, dentry->d_inode, + ceph_dentry(dentry)->offset); /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { @@ -1050,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, struct ceph_inode_info *ci = ceph_inode(inode); int left; - if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT)) + if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) return -EISDIR; if (!cf->dir_info) { @@ -1092,10 +1107,9 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, * an fsync() on a dir will wait for any uncommitted directory * operations to commit. */ -static int ceph_dir_fsync(st |