aboutsummaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/file.c39
-rw-r--r--fs/ceph/ioctl.h2
-rw-r--r--fs/ceph/locks.c94
-rw-r--r--fs/ceph/mds_client.c41
-rw-r--r--fs/ceph/mds_client.h31
6 files changed, 136 insertions, 78 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7d447af84ec..d902948a90d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,7 +40,8 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_fsdata)
return 0;
- if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
+ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
dentry->d_op = &ceph_dentry_ops;
else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
dentry->d_op = &ceph_snapdir_dentry_ops;
@@ -114,8 +115,8 @@ static int __dcache_readdir(struct file *filp,
spin_lock(&dcache_lock);
/* start at beginning? */
- if (filp->f_pos == 2 || (last &&
- filp->f_pos < ceph_dentry(last)->offset)) {
+ if (filp->f_pos == 2 || last == NULL ||
+ filp->f_pos < ceph_dentry(last)->offset) {
if (list_empty(&parent->d_subdirs))
goto out_unlock;
p = parent->d_subdirs.prev;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8d79b8912e3..7d0e4a82d89 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -282,7 +282,8 @@ int ceph_release(struct inode *inode, struct file *file)
static int striped_read(struct inode *inode,
u64 off, u64 len,
struct page **pages, int num_pages,
- int *checkeof, bool align_to_pages)
+ int *checkeof, bool align_to_pages,
+ unsigned long buf_align)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode,
more:
if (align_to_pages)
- page_align = (pos - io_align) & ~PAGE_MASK;
+ page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
else
page_align = pos & ~PAGE_MASK;
this_len = left;
@@ -376,16 +377,18 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
struct inode *inode = file->f_dentry->d_inode;
struct page **pages;
u64 off = *poff;
- int num_pages = calc_pages_for(off, len);
- int ret;
+ int num_pages, ret;
dout("sync_read on file %p %llu~%u %s\n", file, off, len,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
- if (file->f_flags & O_DIRECT)
- pages = ceph_get_direct_page_vector(data, num_pages);
- else
+ if (file->f_flags & O_DIRECT) {
+ num_pages = calc_pages_for((unsigned long)data, len);
+ pages = ceph_get_direct_page_vector(data, num_pages, true);
+ } else {
+ num_pages = calc_pages_for(off, len);
pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+ }
if (IS_ERR(pages))
return PTR_ERR(pages);
@@ -400,7 +403,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
goto done;
ret = striped_read(inode, off, len, pages, num_pages, checkeof,
- file->f_flags & O_DIRECT);
+ file->f_flags & O_DIRECT,
+ (unsigned long)data & ~PAGE_MASK);
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -409,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
done:
if (file->f_flags & O_DIRECT)
- ceph_put_page_vector(pages, num_pages);
+ ceph_put_page_vector(pages, num_pages, true);
else
ceph_release_page_vector(pages, num_pages);
dout("sync_read result %d\n", ret);
@@ -456,6 +460,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
int do_sync = 0;
int check_caps = 0;
int page_align, io_align;
+ unsigned long buf_align;
int ret;
struct timespec mtime = CURRENT_TIME;
@@ -471,6 +476,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
pos = *offset;
io_align = pos & ~PAGE_MASK;
+ buf_align = (unsigned long)data & ~PAGE_MASK;
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
if (ret < 0)
@@ -496,12 +502,15 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
*/
more:
len = left;
- if (file->f_flags & O_DIRECT)
+ if (file->f_flags & O_DIRECT) {
/* write from beginning of first page, regardless of
io alignment */
- page_align = (pos - io_align) & ~PAGE_MASK;
- else
+ page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+ num_pages = calc_pages_for((unsigned long)data, len);
+ } else {
page_align = pos & ~PAGE_MASK;
+ num_pages = calc_pages_for(pos, len);
+ }
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), pos, &len,
CEPH_OSD_OP_WRITE, flags,
@@ -512,10 +521,8 @@ more:
if (!req)
return -ENOMEM;
- num_pages = calc_pages_for(pos, len);
-
if (file->f_flags & O_DIRECT) {
- pages = ceph_get_direct_page_vector(data, num_pages);
+ pages = ceph_get_direct_page_vector(data, num_pages, false);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto out;
@@ -565,7 +572,7 @@ more:
}
if (file->f_flags & O_DIRECT)
- ceph_put_page_vector(pages, num_pages);
+ ceph_put_page_vector(pages, num_pages, false);
else if (file->f_flags & O_SYNC)
ceph_release_page_vector(pages, num_pages);
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index a6ce54e94eb..52e8fd74d45 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
#include <linux/ioctl.h>
#include <linux/types.h>
-#define CEPH_IOCTL_MAGIC 0x98
+#define CEPH_IOCTL_MAGIC 0x97
/* just use u64 to align sanely on all archs */
struct ceph_ioctl_layout {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 40abde93c34..476b329867d 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -11,40 +11,68 @@
* Implement fcntl and flock locking functions.
*/
static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
- u64 pid, u64 pid_ns,
- int cmd, u64 start, u64 length, u8 wait)
+ int cmd, u8 wait, struct file_lock *fl)
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
int err;
+ u64 length = 0;
req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
req->r_inode = igrab(inode);
+ /* mds requires start and length rather than start and end */
+ if (LLONG_MAX == fl->fl_end)
+ length = 0;
+ else
+ length = fl->fl_end - fl->fl_start + 1;
+
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
"length: %llu, wait: %d, type`: %d", (int)lock_type,
- (int)operation, pid, start, length, wait, cmd);
+ (int)operation, (u64)fl->fl_pid, fl->fl_start,
+ length, wait, fl->fl_type);
+
req->r_args.filelock_change.rule = lock_type;
req->r_args.filelock_change.type = cmd;
- req->r_args.filelock_change.pid = cpu_to_le64(pid);
+ req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
/* This should be adjusted, but I'm not sure if
namespaces actually get id numbers*/
req->r_args.filelock_change.pid_namespace =
- cpu_to_le64((u64)pid_ns);
- req->r_args.filelock_change.start = cpu_to_le64(start);
+ cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
+ req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
req->r_args.filelock_change.length = cpu_to_le64(length);
req->r_args.filelock_change.wait = wait;
err = ceph_mdsc_do_request(mdsc, inode, req);
+
+ if ( operation == CEPH_MDS_OP_GETFILELOCK){
+ fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
+ if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
+ fl->fl_type = F_RDLCK;
+ else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
+ fl->fl_type = F_WRLCK;
+ else
+ fl->fl_type = F_UNLCK;
+
+ fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
+ length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
+ le64_to_cpu(req->r_reply_info.filelock_reply->length);
+ if (length >= 1)
+ fl->fl_end = length -1;
+ else
+ fl->fl_end = 0;
+
+ }
ceph_mdsc_put_request(req);
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
- "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type,
- (int)operation, pid, start, length, wait, cmd, err);
+ "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
+ (int)operation, (u64)fl->fl_pid, fl->fl_start,
+ length, wait, fl->fl_type, err);
return err;
}
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
*/
int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
{
- u64 length;
u8 lock_cmd;
int err;
u8 wait = 0;
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
else
lock_cmd = CEPH_LOCK_UNLOCK;
- if (LLONG_MAX == fl->fl_end)
- length = 0;
- else
- length = fl->fl_end - fl->fl_start + 1;
-
- err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- (u64)fl->fl_pid,
- (u64)(unsigned long)fl->fl_nspid,
- lock_cmd, fl->fl_start,
- length, wait);
+ err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
if (!err) {
- dout("mds locked, locking locally");
- err = posix_lock_file(file, fl, NULL);
- if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
- /* undo! This should only happen if the kernel detects
- * local deadlock. */
- ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- (u64)fl->fl_pid,
- (u64)(unsigned long)fl->fl_nspid,
- CEPH_LOCK_UNLOCK, fl->fl_start,
- length, 0);
- dout("got %d on posix_lock_file, undid lock", err);
+ if ( op != CEPH_MDS_OP_GETFILELOCK ){
+ dout("mds locked, locking locally");
+ err = posix_lock_file(file, fl, NULL);
+ if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+ /* undo! This should only happen if the kernel detects
+ * local deadlock. */
+ ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ CEPH_LOCK_UNLOCK, 0, fl);
+ dout("got %d on posix_lock_file, undid lock", err);
+ }
}
+
} else {
dout("mds returned error code %d", err);
}
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
{
- u64 length;
u8 lock_cmd;
int err;
u8 wait = 1;
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
lock_cmd = CEPH_LOCK_EXCL;
else
lock_cmd = CEPH_LOCK_UNLOCK;
- /* mds requires start and length rather than start and end */
- if (LLONG_MAX == fl->fl_end)
- length = 0;
- else
- length = fl->fl_end - fl->fl_start + 1;
err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
- file, (u64)fl->fl_pid,
- (u64)(unsigned long)fl->fl_nspid,
- lock_cmd, fl->fl_start,
- length, wait);
+ file, lock_cmd, wait, fl);
if (!err) {
err = flock_lock_file_wait(file, fl);
if (err) {
ceph_lock_message(CEPH_LOCK_FLOCK,
CEPH_MDS_OP_SETFILELOCK,
- file, (u64)fl->fl_pid,
- (u64)(unsigned long)fl->fl_nspid,
- CEPH_LOCK_UNLOCK, fl->fl_start,
- length, 0);
+ file, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on flock_lock_file_wait, undid lock", err);
}
} else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 098b1850847..38800eaa81d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -202,6 +202,38 @@ out_bad:
}
/*
+ * parse fcntl F_GETLK results
+ */
+static int parse_reply_info_filelock(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info)
+{
+ if (*p + sizeof(*info->filelock_reply) > end)
+ goto bad;
+
+ info->filelock_reply = *p;
+ *p += sizeof(*info->filelock_reply);
+
+ if (unlikely(*p != end))
+ goto bad;
+ return 0;
+
+bad:
+ return -EIO;
+}
+
+/*
+ * parse extra results
+ */
+static int parse_reply_info_extra(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info)
+{
+ if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
+ return parse_reply_info_filelock(p, end, info);
+ else
+ return parse_reply_info_dir(p, end, info);
+}
+
+/*
* parse entire mds reply
*/
static int parse_reply_info(struct ceph_msg *msg,
@@ -223,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg,
goto out_bad;
}
- /* dir content */
+ /* extra */
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
- err = parse_reply_info_dir(&p, p+len, info);
+ err = parse_reply_info_extra(&p, p+len, info);
if (err < 0)
goto out_bad;
}
@@ -2074,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
mutex_lock(&session->s_mutex);
if (err < 0) {
- pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds);
+ pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
ceph_msg_dump(msg);
goto out_err;
}
@@ -2094,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
mutex_lock(&req->r_fill_mutex);
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
- if (result == 0 && rinfo->dir_nr)
+ if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
+ rinfo->dir_nr)
ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 9341fd4f143..aabe563b54d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in {
};
/*
- * parsed info about an mds reply, including information about the
- * target inode and/or its parent directory and dentry, and directory
- * contents (for readdir results).
+ * parsed info about an mds reply, including information about
+ * either: 1) the target inode and/or its parent directory and dentry,
+ * and directory contents (for readdir results), or
+ * 2) the file range lock info (for fcntl F_GETLK results).
*/
struct ceph_mds_reply_info_parsed {
struct ceph_mds_reply_head *head;
+ /* trace */
struct ceph_mds_reply_info_in diri, targeti;
struct ceph_mds_reply_dirfrag *dirfrag;
char *dname;
u32 dname_len;
struct ceph_mds_reply_lease *dlease;
- struct ceph_mds_reply_dirfrag *dir_dir;
- int dir_nr;
- char **dir_dname;
- u32 *dir_dname_len;
- struct ceph_mds_reply_lease **dir_dlease;
- struct ceph_mds_reply_info_in *dir_in;
- u8 dir_complete, dir_end;
+ /* extra */
+ union {
+ /* for fcntl F_GETLK results */
+ struct ceph_filelock *filelock_reply;
+
+ /* for readdir results */
+ struct {
+ struct ceph_mds_reply_dirfrag *dir_dir;
+ int dir_nr;
+ char **dir_dname;
+ u32 *dir_dname_len;
+ struct ceph_mds_reply_lease **dir_dlease;
+ struct ceph_mds_reply_info_in *dir_in;
+ u8 dir_complete, dir_end;
+ };
+ };
/* encoded blob describing snapshot contexts for certain
operations (e.g., open) */