diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/dir.c | 7 | ||||
-rw-r--r-- | fs/ceph/file.c | 39 | ||||
-rw-r--r-- | fs/ceph/ioctl.h | 2 | ||||
-rw-r--r-- | fs/ceph/locks.c | 94 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 41 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 31 |
6 files changed, 136 insertions, 78 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 7d447af84ec..d902948a90d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -40,7 +40,8 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_fsdata) return 0; - if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) + if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ + ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) dentry->d_op = &ceph_dentry_ops; else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) dentry->d_op = &ceph_snapdir_dentry_ops; @@ -114,8 +115,8 @@ static int __dcache_readdir(struct file *filp, spin_lock(&dcache_lock); /* start at beginning? */ - if (filp->f_pos == 2 || (last && - filp->f_pos < ceph_dentry(last)->offset)) { + if (filp->f_pos == 2 || last == NULL || + filp->f_pos < ceph_dentry(last)->offset) { if (list_empty(&parent->d_subdirs)) goto out_unlock; p = parent->d_subdirs.prev; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8d79b8912e3..7d0e4a82d89 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -282,7 +282,8 @@ int ceph_release(struct inode *inode, struct file *file) static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, - int *checkeof, bool align_to_pages) + int *checkeof, bool align_to_pages, + unsigned long buf_align) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); @@ -307,7 +308,7 @@ static int striped_read(struct inode *inode, more: if (align_to_pages) - page_align = (pos - io_align) & ~PAGE_MASK; + page_align = (pos - io_align + buf_align) & ~PAGE_MASK; else page_align = pos & ~PAGE_MASK; this_len = left; @@ -376,16 +377,18 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, struct inode *inode = file->f_dentry->d_inode; struct page **pages; u64 off = *poff; - int num_pages = calc_pages_for(off, len); - int ret; + int num_pages, ret; dout("sync_read on file %p %llu~%u %s\n", file, off, len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); - if (file->f_flags & O_DIRECT) - pages = ceph_get_direct_page_vector(data, num_pages); - else + if (file->f_flags & O_DIRECT) { + num_pages = calc_pages_for((unsigned long)data, len); + pages = ceph_get_direct_page_vector(data, num_pages, true); + } else { + num_pages = calc_pages_for(off, len); pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); + } if (IS_ERR(pages)) return PTR_ERR(pages); @@ -400,7 +403,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, goto done; ret = striped_read(inode, off, len, pages, num_pages, checkeof, - file->f_flags & O_DIRECT); + file->f_flags & O_DIRECT, + (unsigned long)data & ~PAGE_MASK); if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) ret = ceph_copy_page_vector_to_user(pages, data, off, ret); @@ -409,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, done: if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + ceph_put_page_vector(pages, num_pages, true); else ceph_release_page_vector(pages, num_pages); dout("sync_read result %d\n", ret); @@ -456,6 +460,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, int do_sync = 0; int check_caps = 0; int page_align, io_align; + unsigned long buf_align; int ret; struct timespec mtime = CURRENT_TIME; @@ -471,6 +476,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, pos = *offset; io_align = pos & ~PAGE_MASK; + buf_align = (unsigned long)data & ~PAGE_MASK; ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) @@ -496,12 +502,15 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, */ more: len = left; - if (file->f_flags & O_DIRECT) + if (file->f_flags & O_DIRECT) { /* write from beginning of first page, regardless of io alignment */ - page_align = (pos - io_align) & ~PAGE_MASK; - else + page_align = (pos - io_align + buf_align) & ~PAGE_MASK; + num_pages = calc_pages_for((unsigned long)data, len); + } else { page_align = pos & ~PAGE_MASK; + num_pages = calc_pages_for(pos, len); + } req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), pos, &len, CEPH_OSD_OP_WRITE, flags, @@ -512,10 +521,8 @@ more: if (!req) return -ENOMEM; - num_pages = calc_pages_for(pos, len); - if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages); + pages = ceph_get_direct_page_vector(data, num_pages, false); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; @@ -565,7 +572,7 @@ more: } if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + ceph_put_page_vector(pages, num_pages, false); else if (file->f_flags & O_SYNC) ceph_release_page_vector(pages, num_pages); diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index a6ce54e94eb..52e8fd74d45 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h @@ -4,7 +4,7 @@ #include <linux/ioctl.h> #include <linux/types.h> -#define CEPH_IOCTL_MAGIC 0x98 +#define CEPH_IOCTL_MAGIC 0x97 /* just use u64 to align sanely on all archs */ struct ceph_ioctl_layout { diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 40abde93c34..476b329867d 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -11,40 +11,68 @@ * Implement fcntl and flock locking functions. */ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, - u64 pid, u64 pid_ns, - int cmd, u64 start, u64 length, u8 wait) + int cmd, u8 wait, struct file_lock *fl) { struct inode *inode = file->f_dentry->d_inode; struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; int err; + u64 length = 0; req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); req->r_inode = igrab(inode); + /* mds requires start and length rather than start and end */ + if (LLONG_MAX == fl->fl_end) + length = 0; + else + length = fl->fl_end - fl->fl_start + 1; + dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " "length: %llu, wait: %d, type`: %d", (int)lock_type, - (int)operation, pid, start, length, wait, cmd); + (int)operation, (u64)fl->fl_pid, fl->fl_start, + length, wait, fl->fl_type); + req->r_args.filelock_change.rule = lock_type; req->r_args.filelock_change.type = cmd; - req->r_args.filelock_change.pid = cpu_to_le64(pid); + req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); /* This should be adjusted, but I'm not sure if namespaces actually get id numbers*/ req->r_args.filelock_change.pid_namespace = - cpu_to_le64((u64)pid_ns); - req->r_args.filelock_change.start = cpu_to_le64(start); + cpu_to_le64((u64)(unsigned long)fl->fl_nspid); + req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); req->r_args.filelock_change.length = cpu_to_le64(length); req->r_args.filelock_change.wait = wait; err = ceph_mdsc_do_request(mdsc, inode, req); + + if ( operation == CEPH_MDS_OP_GETFILELOCK){ + fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); + if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) + fl->fl_type = F_RDLCK; + else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) + fl->fl_type = F_WRLCK; + else + fl->fl_type = F_UNLCK; + + fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); + length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + + le64_to_cpu(req->r_reply_info.filelock_reply->length); + if (length >= 1) + fl->fl_end = length -1; + else + fl->fl_end = 0; + + } ceph_mdsc_put_request(req); dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " - "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, - (int)operation, pid, start, length, wait, cmd, err); + "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, + (int)operation, (u64)fl->fl_pid, fl->fl_start, + length, wait, fl->fl_type, err); return err; } @@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, */ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) { - u64 length; u8 lock_cmd; int err; u8 wait = 0; @@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else lock_cmd = CEPH_LOCK_UNLOCK; - if (LLONG_MAX == fl->fl_end) - length = 0; - else - length = fl->fl_end - fl->fl_start + 1; - - err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, - (u64)fl->fl_pid, - (u64)(unsigned long)fl->fl_nspid, - lock_cmd, fl->fl_start, - length, wait); + err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); if (!err) { - dout("mds locked, locking locally"); - err = posix_lock_file(file, fl, NULL); - if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { - /* undo! This should only happen if the kernel detects - * local deadlock. */ - ceph_lock_message(CEPH_LOCK_FCNTL, op, file, - (u64)fl->fl_pid, - (u64)(unsigned long)fl->fl_nspid, - CEPH_LOCK_UNLOCK, fl->fl_start, - length, 0); - dout("got %d on posix_lock_file, undid lock", err); + if ( op != CEPH_MDS_OP_GETFILELOCK ){ + dout("mds locked, locking locally"); + err = posix_lock_file(file, fl, NULL); + if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { + /* undo! This should only happen if the kernel detects + * local deadlock. */ + ceph_lock_message(CEPH_LOCK_FCNTL, op, file, + CEPH_LOCK_UNLOCK, 0, fl); + dout("got %d on posix_lock_file, undid lock", err); + } } + } else { dout("mds returned error code %d", err); } @@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) int ceph_flock(struct file *file, int cmd, struct file_lock *fl) { - u64 length; u8 lock_cmd; int err; u8 wait = 1; @@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) lock_cmd = CEPH_LOCK_EXCL; else lock_cmd = CEPH_LOCK_UNLOCK; - /* mds requires start and length rather than start and end */ - if (LLONG_MAX == fl->fl_end) - length = 0; - else - length = fl->fl_end - fl->fl_start + 1; err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, (u64)fl->fl_pid, - (u64)(unsigned long)fl->fl_nspid, - lock_cmd, fl->fl_start, - length, wait); + file, lock_cmd, wait, fl); if (!err) { err = flock_lock_file_wait(file, fl); if (err) { ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, (u64)fl->fl_pid, - (u64)(unsigned long)fl->fl_nspid, - CEPH_LOCK_UNLOCK, fl->fl_start, - length, 0); + file, CEPH_LOCK_UNLOCK, 0, fl); dout("got %d on flock_lock_file_wait, undid lock", err); } } else { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 098b1850847..38800eaa81d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -202,6 +202,38 @@ out_bad: } /* + * parse fcntl F_GETLK results + */ +static int parse_reply_info_filelock(void **p, void *end, + struct ceph_mds_reply_info_parsed *info) +{ + if (*p + sizeof(*info->filelock_reply) > end) + goto bad; + + info->filelock_reply = *p; + *p += sizeof(*info->filelock_reply); + + if (unlikely(*p != end)) + goto bad; + return 0; + +bad: + return -EIO; +} + +/* + * parse extra results + */ +static int parse_reply_info_extra(void **p, void *end, + struct ceph_mds_reply_info_parsed *info) +{ + if (info->head->op == CEPH_MDS_OP_GETFILELOCK) + return parse_reply_info_filelock(p, end, info); + else + return parse_reply_info_dir(p, end, info); +} + +/* * parse entire mds reply */ static int parse_reply_info(struct ceph_msg *msg, @@ -223,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg, goto out_bad; } - /* dir content */ + /* extra */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { - err = parse_reply_info_dir(&p, p+len, info); + err = parse_reply_info_extra(&p, p+len, info); if (err < 0) goto out_bad; } @@ -2074,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&session->s_mutex); if (err < 0) { - pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds); + pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); ceph_msg_dump(msg); goto out_err; } @@ -2094,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&req->r_fill_mutex); err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); if (err == 0) { - if (result == 0 && rinfo->dir_nr) + if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && + rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(mdsc, &req->r_caps_reservation); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 9341fd4f143..aabe563b54d 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in { }; /* - * parsed info about an mds reply, including information about the - * target inode and/or its parent directory and dentry, and directory - * contents (for readdir results). + * parsed info about an mds reply, including information about + * either: 1) the target inode and/or its parent directory and dentry, + * and directory contents (for readdir results), or + * 2) the file range lock info (for fcntl F_GETLK results). */ struct ceph_mds_reply_info_parsed { struct ceph_mds_reply_head *head; + /* trace */ struct ceph_mds_reply_info_in diri, targeti; struct ceph_mds_reply_dirfrag *dirfrag; char *dname; u32 dname_len; struct ceph_mds_reply_lease *dlease; - struct ceph_mds_reply_dirfrag *dir_dir; - int dir_nr; - char **dir_dname; - u32 *dir_dname_len; - struct ceph_mds_reply_lease **dir_dlease; - struct ceph_mds_reply_info_in *dir_in; - u8 dir_complete, dir_end; + /* extra */ + union { + /* for fcntl F_GETLK results */ + struct ceph_filelock *filelock_reply; + + /* for readdir results */ + struct { + struct ceph_mds_reply_dirfrag *dir_dir; + int dir_nr; + char **dir_dname; + u32 *dir_dname_len; + struct ceph_mds_reply_lease **dir_dlease; + struct ceph_mds_reply_info_in *dir_in; + u8 dir_complete, dir_end; + }; + }; /* encoded blob describing snapshot contexts for certain operations (e.g., open) */ |