aboutsummaryrefslogtreecommitdiff
path: root/fs/fuse
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fuse')
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/cuse.c13
-rw-r--r--fs/fuse/dev.c53
-rw-r--r--fs/fuse/dir.c206
-rw-r--r--fs/fuse/file.c495
-rw-r--r--fs/fuse/fuse_i.h29
-rw-r--r--fs/fuse/inode.c65
7 files changed, 612 insertions, 251 deletions
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index a0b0855d00a..205e0d5d530 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -348,7 +348,7 @@ int __init fuse_ctl_init(void)
return register_filesystem(&fuse_ctl_fs_type);
}
-void fuse_ctl_cleanup(void)
+void __exit fuse_ctl_cleanup(void)
{
unregister_filesystem(&fuse_ctl_fs_type);
}
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index b96a49b37d6..966ace8b243 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -94,8 +94,10 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
loff_t pos = 0;
struct iovec iov = { .iov_base = buf, .iov_len = count };
struct fuse_io_priv io = { .async = 0, .file = file };
+ struct iov_iter ii;
+ iov_iter_init(&ii, READ, &iov, 1, count);
- return fuse_direct_io(&io, &iov, 1, count, &pos, 0);
+ return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE);
}
static ssize_t cuse_write(struct file *file, const char __user *buf,
@@ -104,12 +106,15 @@ static ssize_t cuse_write(struct file *file, const char __user *buf,
loff_t pos = 0;
struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
struct fuse_io_priv io = { .async = 0, .file = file };
+ struct iov_iter ii;
+ iov_iter_init(&ii, WRITE, &iov, 1, count);
/*
* No locking or generic_write_checks(), the server is
* responsible for locking and sanity checks.
*/
- return fuse_direct_io(&io, &iov, 1, count, &pos, 1);
+ return fuse_direct_io(&io, &ii, &pos,
+ FUSE_DIO_WRITE | FUSE_DIO_CUSE);
}
static int cuse_open(struct inode *inode, struct file *file)
@@ -568,7 +573,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev,
return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
}
-static DEVICE_ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL);
+static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL);
static ssize_t cuse_class_abort_store(struct device *dev,
struct device_attribute *attr,
@@ -579,7 +584,7 @@ static ssize_t cuse_class_abort_store(struct device *dev,
fuse_abort_conn(&cc->fc);
return count;
}
-static DEVICE_ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store);
+static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store);
static struct attribute *cuse_class_dev_attrs[] = {
&dev_attr_waiting.attr,
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0a648bb455a..ca887314aba 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -643,9 +643,8 @@ struct fuse_copy_state {
unsigned long seglen;
unsigned long addr;
struct page *pg;
- void *mapaddr;
- void *buf;
unsigned len;
+ unsigned offset;
unsigned move_pages:1;
};
@@ -666,23 +665,17 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
if (cs->currbuf) {
struct pipe_buffer *buf = cs->currbuf;
- if (!cs->write) {
- buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
- } else {
- kunmap(buf->page);
+ if (cs->write)
buf->len = PAGE_SIZE - cs->len;
- }
cs->currbuf = NULL;
- cs->mapaddr = NULL;
- } else if (cs->mapaddr) {
- kunmap(cs->pg);
+ } else if (cs->pg) {
if (cs->write) {
flush_dcache_page(cs->pg);
set_page_dirty_lock(cs->pg);
}
put_page(cs->pg);
- cs->mapaddr = NULL;
}
+ cs->pg = NULL;
}
/*
@@ -691,7 +684,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
*/
static int fuse_copy_fill(struct fuse_copy_state *cs)
{
- unsigned long offset;
+ struct page *page;
int err;
unlock_request(cs->fc, cs->req);
@@ -706,14 +699,12 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
- cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
+ cs->pg = buf->page;
+ cs->offset = buf->offset;
cs->len = buf->len;
- cs->buf = cs->mapaddr + buf->offset;
cs->pipebufs++;
cs->nr_segs--;
} else {
- struct page *page;
-
if (cs->nr_segs == cs->pipe->buffers)
return -EIO;
@@ -726,8 +717,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
buf->len = 0;
cs->currbuf = buf;
- cs->mapaddr = kmap(page);
- cs->buf = cs->mapaddr;
+ cs->pg = page;
+ cs->offset = 0;
cs->len = PAGE_SIZE;
cs->pipebufs++;
cs->nr_segs++;
@@ -740,14 +731,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->iov++;
cs->nr_segs--;
}
- err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
+ err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
if (err < 0)
return err;
BUG_ON(err != 1);
- offset = cs->addr % PAGE_SIZE;
- cs->mapaddr = kmap(cs->pg);
- cs->buf = cs->mapaddr + offset;
- cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->pg = page;
+ cs->offset = cs->addr % PAGE_SIZE;
+ cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
cs->seglen -= cs->len;
cs->addr += cs->len;
}
@@ -760,15 +750,20 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
{
unsigned ncpy = min(*size, cs->len);
if (val) {
+ void *pgaddr = kmap_atomic(cs->pg);
+ void *buf = pgaddr + cs->offset;
+
if (cs->write)
- memcpy(cs->buf, *val, ncpy);
+ memcpy(buf, *val, ncpy);
else
- memcpy(*val, cs->buf, ncpy);
+ memcpy(*val, buf, ncpy);
+
+ kunmap_atomic(pgaddr);
*val += ncpy;
}
*size -= ncpy;
cs->len -= ncpy;
- cs->buf += ncpy;
+ cs->offset += ncpy;
return ncpy;
}
@@ -874,8 +869,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
out_fallback_unlock:
unlock_page(newpage);
out_fallback:
- cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
- cs->buf = cs->mapaddr + buf->offset;
+ cs->pg = buf->page;
+ cs->offset = buf->offset;
err = lock_request(cs->fc, cs->req);
if (err)
@@ -1614,7 +1609,7 @@ out_finish:
static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
{
- release_pages(req->pages, req->num_pages, 0);
+ release_pages(req->pages, req->num_pages, false);
}
static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 1d1292c581c..0c6048247a3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -198,7 +198,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
inode = ACCESS_ONCE(entry->d_inode);
if (inode && is_bad_inode(inode))
goto invalid;
- else if (fuse_dentry_time(entry) < get_jiffies_64()) {
+ else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
+ (flags & LOOKUP_REVAL)) {
int err;
struct fuse_entry_out outarg;
struct fuse_req *req;
@@ -679,6 +680,14 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
return create_new_entry(fc, req, dir, entry, S_IFLNK);
}
+static inline void fuse_update_ctime(struct inode *inode)
+{
+ if (!IS_NOCMTIME(inode)) {
+ inode->i_ctime = current_fs_time(inode->i_sb);
+ mark_inode_dirty_sync(inode);
+ }
+}
+
static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
@@ -713,6 +722,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
fuse_invalidate_attr(inode);
fuse_invalidate_attr(dir);
fuse_invalidate_entry_cache(entry);
+ fuse_update_ctime(inode);
} else if (err == -EINTR)
fuse_invalidate_entry(entry);
return err;
@@ -743,23 +753,26 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
return err;
}
-static int fuse_rename(struct inode *olddir, struct dentry *oldent,
- struct inode *newdir, struct dentry *newent)
+static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent,
+ unsigned int flags, int opcode, size_t argsize)
{
int err;
- struct fuse_rename_in inarg;
+ struct fuse_rename2_in inarg;
struct fuse_conn *fc = get_fuse_conn(olddir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
+ struct fuse_req *req;
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
- memset(&inarg, 0, sizeof(inarg));
+ memset(&inarg, 0, argsize);
inarg.newdir = get_node_id(newdir);
- req->in.h.opcode = FUSE_RENAME;
+ inarg.flags = flags;
+ req->in.h.opcode = opcode;
req->in.h.nodeid = get_node_id(olddir);
req->in.numargs = 3;
- req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].size = argsize;
req->in.args[0].value = &inarg;
req->in.args[1].size = oldent->d_name.len + 1;
req->in.args[1].value = oldent->d_name.name;
@@ -771,15 +784,22 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
if (!err) {
/* ctime changes */
fuse_invalidate_attr(oldent->d_inode);
+ fuse_update_ctime(oldent->d_inode);
+
+ if (flags & RENAME_EXCHANGE) {
+ fuse_invalidate_attr(newent->d_inode);
+ fuse_update_ctime(newent->d_inode);
+ }
fuse_invalidate_attr(olddir);
if (olddir != newdir)
fuse_invalidate_attr(newdir);
/* newent will end up negative */
- if (newent->d_inode) {
+ if (!(flags & RENAME_EXCHANGE) && newent->d_inode) {
fuse_invalidate_attr(newent->d_inode);
fuse_invalidate_entry_cache(newent);
+ fuse_update_ctime(newent->d_inode);
}
} else if (err == -EINTR) {
/* If request was interrupted, DEITY only knows if the
@@ -795,6 +815,42 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
return err;
}
+static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent,
+ unsigned int flags)
+{
+ struct fuse_conn *fc = get_fuse_conn(olddir);
+ int err;
+
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ return -EINVAL;
+
+ if (flags) {
+ if (fc->no_rename2 || fc->minor < 23)
+ return -EINVAL;
+
+ err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
+ FUSE_RENAME2,
+ sizeof(struct fuse_rename2_in));
+ if (err == -ENOSYS) {
+ fc->no_rename2 = 1;
+ err = -EINVAL;
+ }
+ } else {
+ err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
+ FUSE_RENAME,
+ sizeof(struct fuse_rename_in));
+ }
+
+ return err;
+}
+
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent)
+{
+ return fuse_rename2(olddir, oldent, newdir, newent, 0);
+}
+
static int fuse_link(struct dentry *entry, struct inode *newdir,
struct dentry *newent)
{
@@ -829,6 +885,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
inc_nlink(inode);
spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
+ fuse_update_ctime(inode);
} else if (err == -EINTR) {
fuse_invalidate_attr(inode);
}
@@ -839,6 +896,16 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
struct kstat *stat)
{
unsigned int blkbits;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ /* see the comment in fuse_change_attributes() */
+ if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
+ attr->size = i_size_read(inode);
+ attr->mtime = inode->i_mtime.tv_sec;
+ attr->mtimensec = inode->i_mtime.tv_nsec;
+ attr->ctime = inode->i_ctime.tv_sec;
+ attr->ctimensec = inode->i_ctime.tv_nsec;
+ }
stat->dev = inode->i_sb->s_dev;
stat->ino = attr->ino;
@@ -925,7 +992,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
int err;
bool r;
- if (fi->i_time < get_jiffies_64()) {
+ if (time_before64(fi->i_time, get_jiffies_64())) {
r = true;
err = fuse_do_getattr(inode, stat, file);
} else {
@@ -1111,7 +1178,7 @@ static int fuse_permission(struct inode *inode, int mask)
((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
struct fuse_inode *fi = get_fuse_inode(inode);
- if (fi->i_time < get_jiffies_64()) {
+ if (time_before64(fi->i_time, get_jiffies_64())) {
refreshed = true;
err = fuse_perm_getattr(inode, mask);
@@ -1477,12 +1544,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
}
-static bool update_mtime(unsigned ivalid)
+static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
{
/* Always update if mtime is explicitly set */
if (ivalid & ATTR_MTIME_SET)
return true;
+ /* Or if kernel i_mtime is the official one */
+ if (trust_local_mtime)
+ return true;
+
/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
return false;
@@ -1491,7 +1562,8 @@ static bool update_mtime(unsigned ivalid)
return true;
}
-static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
+static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
+ bool trust_local_cmtime)
{
unsigned ivalid = iattr->ia_valid;
@@ -1510,13 +1582,18 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
if (!(ivalid & ATTR_ATIME_SET))
arg->valid |= FATTR_ATIME_NOW;
}
- if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
+ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
arg->valid |= FATTR_MTIME;
arg->mtime = iattr->ia_mtime.tv_sec;
arg->mtimensec = iattr->ia_mtime.tv_nsec;
- if (!(ivalid & ATTR_MTIME_SET))
+ if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
arg->valid |= FATTR_MTIME_NOW;
}
+ if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
+ arg->valid |= FATTR_CTIME;
+ arg->ctime = iattr->ia_ctime.tv_sec;
+ arg->ctimensec = iattr->ia_ctime.tv_nsec;
+ }
}
/*
@@ -1563,6 +1640,62 @@ void fuse_release_nowrite(struct inode *inode)
spin_unlock(&fc->lock);
}
+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
+ struct inode *inode,
+ struct fuse_setattr_in *inarg_p,
+ struct fuse_attr_out *outarg_p)
+{
+ req->in.h.opcode = FUSE_SETATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*inarg_p);
+ req->in.args[0].value = inarg_p;
+ req->out.numargs = 1;
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(*outarg_p);
+ req->out.args[0].value = outarg_p;
+}
+
+/*
+ * Flush inode->i_mtime to the server
+ */
+int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_setattr_in inarg;
+ struct fuse_attr_out outarg;
+ int err;
+
+ req = fuse_get_req_nopages(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+
+ inarg.valid = FATTR_MTIME;
+ inarg.mtime = inode->i_mtime.tv_sec;
+ inarg.mtimensec = inode->i_mtime.tv_nsec;
+ if (fc->minor >= 23) {
+ inarg.valid |= FATTR_CTIME;
+ inarg.ctime = inode->i_ctime.tv_sec;
+ inarg.ctimensec = inode->i_ctime.tv_nsec;
+ }
+ if (ff) {
+ inarg.valid |= FATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+
+ return err;
+}
+
/*
* Set attributes, and at the same time refresh them.
*
@@ -1580,8 +1713,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
bool is_truncate = false;
+ bool is_wb = fc->writeback_cache;
loff_t oldsize;
int err;
+ bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
attr->ia_valid |= ATTR_FORCE;
@@ -1606,11 +1741,13 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
if (is_truncate) {
fuse_set_nowrite(inode);
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+ if (trust_local_cmtime && attr->ia_size != inode->i_size)
+ attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
}
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
- iattr_to_fattr(attr, &inarg);
+ iattr_to_fattr(attr, &inarg, trust_local_cmtime);
if (file) {
struct fuse_file *ff = file->private_data;
inarg.valid |= FATTR_FH;
@@ -1621,17 +1758,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
inarg.valid |= FATTR_LOCKOWNER;
inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
}
- req->in.h.opcode = FUSE_SETATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
- if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
- else
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
+ fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
fuse_request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
@@ -1648,10 +1775,21 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
}
spin_lock(&fc->lock);
+ /* the kernel maintains i_mtime locally */
+ if (trust_local_cmtime) {
+ if (attr->ia_valid & ATTR_MTIME)
+ inode->i_mtime = attr->ia_mtime;
+ if (attr->ia_valid & ATTR_CTIME)
+ inode->i_ctime = attr->ia_ctime;
+ /* FIXME: clear I_DIRTY_SYNC? */
+ }
+
fuse_change_attributes_common(inode, &outarg.attr,
attr_timeout(&outarg));
oldsize = inode->i_size;
- i_size_write(inode, outarg.attr.size);
+ /* see the comment in fuse_change_attributes() */
+ if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
+ i_size_write(inode, outarg.attr.size);
if (is_truncate) {
/* NOTE: this may release/reacquire fc->lock */
@@ -1663,7 +1801,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
* Only call invalidate_inode_pages2() after removing
* FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
*/
- if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
+ if ((is_truncate || !is_wb) &&
+ S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
truncate_pagecache(inode, outarg.attr.size);
invalidate_inode_pages2(inode->i_mapping);
}
@@ -1739,8 +1878,10 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
fc->no_setxattr = 1;
err = -EOPNOTSUPP;
}
- if (!err)
+ if (!err) {
fuse_invalidate_attr(inode);
+ fuse_update_ctime(inode);
+ }
return err;
}
@@ -1870,8 +2011,10 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
fc->no_removexattr = 1;
err = -EOPNOTSUPP;
}
- if (!err)
+ if (!err) {
fuse_invalidate_attr(inode);
+ fuse_update_ctime(inode);
+ }
return err;
}
@@ -1882,6 +2025,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
.unlink = fuse_unlink,
.rmdir = fuse_rmdir,
.rename = fuse_rename,
+ .rename2 = fuse_rename2,
.link = fuse_link,
.setattr = fuse_setattr,
.create = fuse_create,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 77bcc303c3a..40ac2628ddc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -188,6 +188,22 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
}
EXPORT_SYMBOL_GPL(fuse_do_open);
+static void fuse_link_write_file(struct file *file)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_file *ff = file->private_data;
+ /*
+ * file may be written through mmap, so chain it onto the
+ * inodes's write_file list
+ */
+ spin_lock(&fc->lock);
+ if (list_empty(&ff->write_entry))
+ list_add(&ff->write_entry, &fi->write_files);
+ spin_unlock(&fc->lock);
+}
+
void fuse_finish_open(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
@@ -207,25 +223,37 @@ void fuse_finish_open(struct inode *inode, struct file *file)
i_size_write(inode, 0);
spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
+ if (fc->writeback_cache)
+ file_update_time(file);
}
+ if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+ fuse_link_write_file(file);
}
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
{
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
+ bool lock_inode = (file->f_flags & O_TRUNC) &&
+ fc->atomic_o_trunc &&
+ fc->writeback_cache;
err = generic_file_open(inode, file);
if (err)
return err;
+ if (lock_inode)
+ mutex_lock(&inode->i_mutex);
+
err = fuse_do_open(fc, get_node_id(inode), file, isdir);
- if (err)
- return err;
- fuse_finish_open(inode, file);
+ if (!err)
+ fuse_finish_open(inode, file);
- return 0;
+ if (lock_inode)
+ mutex_unlock(&inode->i_mutex);
+
+ return err;
}
static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
@@ -292,6 +320,12 @@ static int fuse_open(struct inode *inode, struct file *file)
static int fuse_release(struct inode *inode, struct file *file)
{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ /* see fuse_vma_close() for !writeback_cache case */
+ if (fc->writeback_cache)
+ write_inode_now(inode, 1);
+
fuse_release_common(file, FUSE_RELEASE);
/* return value is ignored by VFS */
@@ -333,12 +367,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
}
/*
- * Check if page is under writeback
+ * Check if any page in a range is under writeback
*
* This is currently done by walking the list of writepage requests
* for the inode, which can be pretty inefficient.
*/
-static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
+static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
+ pgoff_t idx_to)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -351,8 +386,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
BUG_ON(req->inode != inode);
curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
- if (curr_index <= index &&
- index < curr_index + req->num_pages) {
+ if (idx_from < curr_index + req->num_pages &&
+ curr_index <= idx_to) {
found = true;
break;
}
@@ -362,6 +397,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
return found;
}
+static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
+{
+ return fuse_range_is_writeback(inode, index, index);
+}
+
/*
* Wait for page writeback to be completed.
*
@@ -376,6 +416,21 @@ static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
return 0;
}
+/*
+ * Wait for all pending writepages on the inode to finish.
+ *
+ * This is currently done by blocking further writes with FUSE_NOWRITE
+ * and waiting for all sent writes to complete.
+ *
+ * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
+ * could conflict with truncation.
+ */
+static void fuse_sync_writes(struct inode *inode)
+{
+ fuse_set_nowrite(inode);
+ fuse_release_nowrite(inode);
+}
+
static int fuse_flush(struct file *file, fl_owner_t id)
{
struct inode *inode = file_inode(file);
@@ -391,6 +446,14 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (fc->no_flush)
return 0;
+ err = write_inode_now(inode, 1);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
+ fuse_sync_writes(inode);
+ mutex_unlock(&inode->i_mutex);
+
req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
@@ -411,21 +474,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
return err;
}
-/*
- * Wait for all pending writepages on the inode to finish.
- *
- * This is currently done by blocking further writes with FUSE_NOWRITE
- * and waiting for all sent writes to complete.
- *
- * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
- * could conflict with truncation.
- */
-static void fuse_sync_writes(struct inode *inode)
-{
- fuse_set_nowrite(inode);
- fuse_release_nowrite(inode);
-}
-
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
int datasync, int isdir)
{
@@ -439,13 +487,6 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
if (is_bad_inode(inode))
return -EIO;
- err = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (err)
- return err;
-
- if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
- return 0;
-
mutex_lock(&inode->i_mutex);
/*
@@ -453,11 +494,17 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
* wait for all outstanding writes, before sending the FSYNC
* request.
*/
- err = write_inode_now(inode, 0);
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
goto out;
fuse_sync_writes(inode);
+ err = sync_inode_metadata(inode, 1);
+ if (err)
+ goto out;
+
+ if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
+ goto out;
req = fuse_get_req_nopages(fc);
if (IS_ERR(req)) {
@@ -655,7 +702,33 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
spin_unlock(&fc->lock);
}
-static int fuse_readpage(struct file *file, struct page *page)
+static void fuse_short_read(struct fuse_req *req, struct inode *inode,
+ u64 attr_ver)
+{
+ size_t num_read = req->out.args[0].size;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (fc->writeback_cache) {
+ /*
+ * A hole in a file. Some data after the hole are in page cache,
+ * but have not reached the client fs yet. So, the hole is not
+ * present there.
+ */
+ int i;
+ int start_idx = num_read >> PAGE_CACHE_SHIFT;
+ size_t off = num_read & (PAGE_CACHE_SIZE - 1);
+
+ for (i = start_idx; i < req->num_pages; i++) {
+ zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
+ off = 0;
+ }
+ } else {
+ loff_t pos = page_offset(req->pages[0]) + num_read;
+ fuse_read_update_size(inode, pos, attr_ver);
+ }
+}
+
+static int fuse_do_readpage(struct file *file, struct page *page)
{
struct fuse_io_priv io = { .async = 0, .file = file };
struct inode *inode = page->mapping->host;
@@ -667,10 +740,6 @@ static int fuse_readpage(struct file *file, struct page *page)
u64 attr_ver;
int err;
- err = -EIO;
- if (is_bad_inode(inode))
- goto out;
-
/*
* Page writeback can extend beyond the lifetime of the
* page-cache page, so make sure we read a properly synced
@@ -679,9 +748,8 @@ static int fuse_readpage(struct file *file, struct page *page)
fuse_wait_on_page_writeback(inode, page->index);
req = fuse_get_req(fc, 1);
- err = PTR_ERR(req);
if (IS_ERR(req))
- goto out;
+ return PTR_ERR(req);
attr_ver = fuse_get_attr_version(fc);
@@ -692,18 +760,32 @@ static int fuse_readpage(struct file *file, struct page *page)
req->page_descs[0].length = count;
num_read = fuse_send_read(req, &io, pos, count, NULL);
err = req->out.h.error;
- fuse_put_request(fc, req);
if (!err) {
/*
* Short read means EOF. If file size is larger, truncate it
*/
if (num_read < count)
- fuse_read_update_size(inode, pos + num_read, attr_ver);
+ fuse_short_read(req, inode, attr_ver);
SetPageUptodate(page);
}
+ fuse_put_request(fc, req);
+
+ return err;
+}
+
+static int fuse_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ int err;
+
+ err = -EIO;
+ if (is_bad_inode(inode))
+ goto out;
+
+ err = fuse_do_readpage(file, page);
fuse_invalidate_atime(inode);
out:
unlock_page(page);
@@ -726,13 +808,9 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
/*
* Short read means EOF. If file size is larger, truncate it
*/
- if (!req->out.h.error && num_read < count) {
- loff_t pos;
+ if (!req->out.h.error && num_read < count)
+ fuse_short_read(req, inode, req->misc.read.attr_ver);
- pos = page_offset(req->pages[0]) + num_read;
- fuse_read_update_size(inode, pos,
- req->misc.read.attr_ver);
- }
fuse_invalidate_atime(inode);
}
@@ -855,8 +933,7 @@ out:
return err;
}
-static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -867,14 +944,14 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
* i_size is up to date).
*/
if (fc->auto_inval_data ||
- (pos + iov_length(iov, nr_segs) > i_size_read(inode))) {
+ (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
int err;
err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
if (err)
return err;
}
- return generic_file_aio_read(iocb, iov, nr_segs, pos);
+ return generic_file_read_iter(iocb, to);
}
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
@@ -922,16 +999,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
return req->misc.write.out.size;
}
-void fuse_write_update_size(struct inode *inode, loff_t pos)
+bool fuse_write_update_size(struct inode *inode, loff_t pos)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
+ bool ret = false;
spin_lock(&fc->lock);
fi->attr_version = ++fc->attr_version;
- if (pos > inode->i_size)
+ if (pos > inode->i_size) {
i_size_write(inode, pos);
+ ret = true;
+ }
spin_unlock(&fc->lock);
+
+ return ret;
}
static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
@@ -1003,13 +1085,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- pagefault_disable();
tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
- pagefault_enable();
flush_dcache_page(page);
- mark_page_accessed(page);
-
if (!tmp) {
unlock_page(page);
page_cache_release(page);
@@ -1102,28 +1180,27 @@ static ssize_t fuse_perform_write(struct file *file,
return res > 0 ? res : err;
}
-static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
- size_t count = 0;
- size_t ocount = 0;
+ size_t count = iov_iter_count(from);
ssize_t written = 0;
ssize_t written_buffered = 0;
struct inode *inode = mapping->host;
ssize_t err;
- struct iov_iter i;
loff_t endbyte = 0;
+ loff_t pos = iocb->ki_pos;
- WARN_ON(iocb->ki_pos != pos);
+ if (get_fuse_conn(inode)->writeback_cache) {
+ /* Update size (EOF optimization) and mode (SUID clearing) */
+ err = fuse_update_attributes(mapping->host, NULL, file, NULL);
+ if (err)
+ return err;
- ocount = 0;
- err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
- if (err)
- return err;
+ return generic_file_write_iter(iocb, from);
+ }
- count = ocount;
mutex_lock(&inode->i_mutex);
/* We can write back this queue in page reclaim */
@@ -1136,6 +1213,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (count == 0)
goto out;
+ iov_iter_truncate(from, count);
err = file_remove_suid(file);
if (err)
goto out;
@@ -1145,17 +1223,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
goto out;
if (file->f_flags & O_DIRECT) {
- written = generic_file_direct_write(iocb, iov, &nr_segs,
- pos, &iocb->ki_pos,
- count, ocount);
- if (written < 0 || written == count)
+ written = generic_file_direct_write(iocb, from, pos);
+ if (written < 0 || !iov_iter_count(from))
goto out;
pos += written;
- count -= written;
- iov_iter_init(&i, iov, nr_segs, count, written);
- written_buffered = fuse_perform_write(file, mapping, &i, pos);
+ written_buffered = fuse_perform_write(file, mapping, from, pos);
if (written_buffered < 0) {
err = written_buffered;
goto out;
@@ -1174,8 +1248,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
written += written_buffered;
iocb->ki_pos = pos + written_buffered;
} else {
- iov_iter_init(&i, iov, nr_segs, count, 0);
- written = fuse_perform_write(file, mapping, &i, pos);
+ written = fuse_perform_write(file, mapping, from, pos);
if (written >= 0)
iocb->ki_pos = pos + written;
}
@@ -1213,7 +1286,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
size_t nbytes = 0; /* # bytes already packed in req */
/* Special case for kernel I/O: can copy directly into the buffer */
- if (segment_eq(get_fs(), KERNEL_DS)) {
+ if (ii->type & ITER_KVEC) {
unsigned long user_addr = fuse_get_user_addr(ii);
size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
@@ -1229,35 +1302,26 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
unsigned npages;
- unsigned long user_addr = fuse_get_user_addr(ii);
- unsigned offset = user_addr & ~PAGE_MASK;
- size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
- int ret;
-
+ size_t start;
unsigned n = req->max_pages - req->num_pages;
- frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
-
- npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- npages = clamp(npages, 1U, n);
-
- ret = get_user_pages_fast(user_addr, npages, !write,
- &req->pages[req->num_pages]);
+ ssize_t ret = iov_iter_get_pages(ii,
+ &req->pages[req->num_pages],
+ n * PAGE_SIZE, &start);
if (ret < 0)
return ret;
- npages = ret;
- frag_size = min_t(size_t, frag_size,
- (npages << PAGE_SHIFT) - offset);
- iov_iter_advance(ii, frag_size);
+ iov_iter_advance(ii, ret);
+ nbytes += ret;
+
+ ret += start;
+ npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
- req->page_descs[req->num_pages].offset = offset;
+ req->page_descs[req->num_pages].offset = start;
fuse_page_descs_length_init(req, req->num_pages, npages);
req->num_pages += npages;
req->page_descs[req->num_pages - 1].length -=
- (npages << PAGE_SHIFT) - offset - frag_size;
-
- nbytes += frag_size;
+ (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
}
if (write)
@@ -1272,48 +1336,46 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
static inline int fuse_iter_npages(const struct iov_iter *ii_p)
{
- struct iov_iter ii = *ii_p;
- int npages = 0;
-
- while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
- unsigned long user_addr = fuse_get_user_addr(&ii);
- unsigned offset = user_addr & ~PAGE_MASK;
- size_t frag_size = iov_iter_single_seg_count(&ii);
-
- npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- iov_iter_advance(&ii, frag_size);
- }
-
- return min(npages, FUSE_MAX_PAGES_PER_REQ);
+ return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
}
-ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
- unsigned long nr_segs, size_t count, loff_t *ppos,
- int write)
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+ loff_t *ppos, int flags)
{
+ int write = flags & FUSE_DIO_WRITE;
+ int cuse = flags & FUSE_DIO_CUSE;
struct file *file = io->file;
+ struct inode *inode = file->f_mapping->host;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
size_t nmax = write ? fc->max_write : fc->max_read;
loff_t pos = *ppos;
+ size_t count = iov_iter_count(iter);
+ pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
ssize_t res = 0;
struct fuse_req *req;
- struct iov_iter ii;
-
- iov_iter_init(&ii, iov, nr_segs, count, 0);
if (io->async)
- req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
+ req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
else
- req = fuse_get_req(fc, fuse_iter_npages(&ii));
+ req = fuse_get_req(fc, fuse_iter_npages(iter));
if (IS_ERR(req))
return PTR_ERR(req);
+ if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
+ if (!write)
+ mutex_lock(&inode->i_mutex);
+ fuse_sync_writes(inode);
+ if (!write)
+ mutex_unlock(&inode->i_mutex);
+ }
+
while (count) {
size_t nres;
fl_owner_t owner = current->files;
size_t nbytes = min(count, nmax);
- int err = fuse_get_user_pages(req, &ii, &nbytes, write);
+ int err = fuse_get_user_pages(req, iter, &nbytes, write);
if (err) {
res = err;
break;
@@ -1343,9 +1405,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
fuse_put_request(fc, req);
if (io->async)
req = fuse_get_req_for_background(fc,
- fuse_iter_npages(&ii));
+ fuse_iter_npages(iter));
else
- req = fuse_get_req(fc, fuse_iter_npages(&ii));
+ req = fuse_get_req(fc, fuse_iter_npages(iter));
if (IS_ERR(req))
break;
}
@@ -1360,9 +1422,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
EXPORT_SYMBOL_GPL(fuse_direct_io);
static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
- const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos,
- size_t count)
+ struct iov_iter *iter,
+ loff_t *ppos)
{
ssize_t res;
struct file *file = io->file;
@@ -1371,7 +1432,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
if (is_bad_inode(inode))
return -EIO;
- res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0);
+ res = fuse_direct_io(io, iter, ppos, 0);
fuse_invalidate_attr(inode);
@@ -1383,21 +1444,26 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
{
struct fuse_io_priv io = { .async = 0, .file = file };
struct iovec iov = { .iov_base = buf, .iov_len = count };
- return __fuse_direct_read(&io, &iov, 1, ppos, count);
+ struct iov_iter ii;
+ iov_iter_init(&ii, READ, &iov, 1, count);
+ return __fuse_direct_read(&io, &ii, ppos);
}
static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
- const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+ struct iov_iter *iter,
+ loff_t *ppos)
{
struct file *file = io->file;
struct inode *inode = file_inode(file);
- size_t count = iov_length(iov, nr_segs);
+ size_t count = iov_iter_count(iter);
ssize_t res;
+
res = generic_write_checks(file, ppos, &count, 0);
- if (!res)
- res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1);
+ if (!res) {
+ iov_iter_truncate(iter, count);
+ res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
+ }
fuse_invalidate_attr(inode);
@@ -1411,13 +1477,15 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
struct inode *inode = file_inode(file);
ssize_t res;
struct fuse_io_priv io = { .async = 0, .file = file };
+ struct iov_iter ii;
+ iov_iter_init(&ii, WRITE, &iov, 1, count);
if (is_bad_inode(inode))
return -EIO;
/* Don't allow parallel writes to the same file */
mutex_lock(&inode->i_mutex);
- res = __fuse_direct_write(&io, &iov, 1, ppos);
+ res = __fuse_direct_write(&io, &ii, ppos);
if (res > 0)
fuse_write_update_size(inode, *ppos);
mutex_unlock(&inode->i_mutex);
@@ -1556,13 +1624,13 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
fuse_writepage_free(fc, req);
}
-static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
- struct fuse_inode *fi)
+static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
+ struct fuse_inode *fi)
{
struct fuse_file *ff = NULL;
spin_lock(&fc->lock);
- if (!WARN_ON(list_empty(&fi->write_files))) {
+ if (!list_empty(&fi->write_files)) {
ff = list_entry(fi->write_files.next, struct fuse_file,
write_entry);
fuse_file_get(ff);
@@ -1572,6 +1640,29 @@ static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
return ff;
}
+static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
+ struct fuse_inode *fi)
+{
+ struct fuse_file *ff = __fuse_write_file_get(fc, fi);
+ WARN_ON(!ff);
+ return ff;
+}
+
+int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_file *ff;
+ int err;
+
+ ff = __fuse_write_file_get(fc, fi);
+ err = fuse_flush_times(inode, ff);
+ if (ff)
+ fuse_file_put(ff, 0);
+
+ return err;
+}
+
static int fuse_writepage_locked(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -1596,7 +1687,7 @@ static int fuse_writepage_locked(struct page *page)
error = -EIO;
req->ff = fuse_write_file_get(fc, fi);
if (!req->ff)
- goto err_free;
+ goto err_nofile;
fuse_write_fill(req, req->ff, page_offset(page), 0);
@@ -1624,6 +1715,8 @@ static int fuse_writepage_locked(struct page *page)
return 0;
+err_nofile:
+ __free_page(tmp_page);
err_free:
fuse_request_free(req);
err:
@@ -1864,8 +1957,8 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL;
err = -ENOMEM;
- data.orig_pages = kzalloc(sizeof(struct page *) *
- FUSE_MAX_PAGES_PER_REQ,
+ data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
+ sizeof(struct page *),
GFP_NOFS);
if (!data.orig_pages)
goto out;
@@ -1885,6 +1978,77 @@ out:
return err;
}
+/*
+ * It's worthy to make sure that space is reserved on disk for the write,
+ * but how to implement it without killing performance need more thinking.
+ */
+static int fuse_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode);
+ struct page *page;
+ loff_t fsize;
+ int err = -ENOMEM;
+
+ WARN_ON(!fc->writeback_cache);
+
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page)
+ goto error;
+
+ fuse_wait_on_page_writeback(mapping->host, page->index);
+
+ if (PageUptodate(page) || len == PAGE_CACHE_SIZE)
+ goto success;
+ /*
+ * Check if the start this page comes after the end of file, in which
+ * case the readpage can be optimized away.
+ */
+ fsize = i_size_read(mapping->host);
+ if (fsize <= (pos & PAGE_CACHE_MASK)) {
+ size_t off = pos & ~PAGE_CACHE_MASK;
+ if (off)
+ zero_user_segment(page, 0, off);
+ goto success;
+ }
+ err = fuse_do_readpage(file, page);
+ if (err)
+ goto cleanup;
+success:
+ *pagep = page;
+ return 0;
+
+cleanup:
+ unlock_page(page);
+ page_cache_release(page);
+error:
+ return err;
+}
+
+static int fuse_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = page->mapping->host;
+
+ if (!PageUptodate(page)) {
+ /* Zero any unwritten bytes at the end of the page */
+ size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
+ if (endoff)
+ zero_user_segment(page, endoff, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ }
+
+ fuse_write_update_size(inode, pos + copied);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ return copied;
+}
+
static int fuse_launder_page(struct page *page)
{
int err = 0;
@@ -1940,26 +2104,16 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
static const struct vm_operations_struct fuse_file_vm_ops = {
.close = fuse_vma_close,
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = fuse_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
- struct inode *inode = file_inode(file);
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
- struct fuse_file *ff = file->private_data;
- /*
- * file may be written through mmap, so chain it onto the
- * inodes's write_file list
- */
- spin_lock(&fc->lock);
- if (list_empty(&ff->write_entry))
- list_add(&ff->write_entry, &fi->write_files);
- spin_unlock(&fc->lock);
- }
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+ fuse_link_write_file(file);
+
file_accessed(file);
vma->vm_ops = &fuse_file_vm_ops;
return 0;
@@ -2117,7 +2271,6 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
struct fuse_file *ff = file->private_data;
/* emulate flock with POSIX locks */
- fl->fl_owner = (fl_owner_t) file;
ff->flock = true;
err = fuse_setlk(file, fl, 1);
}
@@ -2188,7 +2341,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
if (!bytes)
return 0;
- iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+ iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
while (iov_iter_count(&ii)) {
struct page *page = pages[page_idx++];
@@ -2606,7 +2759,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
{
spin_lock(&fc->lock);
if (RB_EMPTY_NODE(&ff->polled_node)) {
- struct rb_node **link, *parent;
+ struct rb_node **link, *uninitialized_var(parent);
link = fuse_find_polled_node(fc, ff->kh, &parent);
BUG_ON(*link);
@@ -2710,8 +2863,8 @@ static inline loff_t fuse_round_up(loff_t off)
}
static ssize_t
-fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs)
+fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
ssize_t ret = 0;
struct file *file = iocb->ki_filp;
@@ -2720,7 +2873,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t pos = 0;
struct inode *inode;
loff_t i_size;
- size_t count = iov_length(iov, nr_segs);
+ size_t count = iov_iter_count(iter);
struct fuse_io_priv *io;
pos = offset;
@@ -2735,6 +2888,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
if (offset >= i_size)
return 0;
count = min_t(loff_t, count, fuse_round_up(i_size - offset));
+ iov_iter_truncate(iter, count);
}
io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2764,9 +2918,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
io->async = false;
if (rw == WRITE)
- ret = __fuse_direct_write(io, iov, nr_segs, &pos);
+ ret = __fuse_direct_write(io, iter, &pos);
else
- ret = __fuse_direct_read(io, iov, nr_segs, &pos, count);
+ ret = __fuse_direct_read(io, iter, &pos);
if (io->async) {
fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
@@ -2808,6 +2962,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
(mode & FALLOC_FL_PUNCH_HOLE);
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+
if (fc->no_fallocate)
return -EOPNOTSUPP;
@@ -2850,8 +3007,12 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
goto out;
/* we could have extended the file */
- if (!(mode & FALLOC_FL_KEEP_SIZE))
- fuse_write_update_size(inode, offset + length);
+ if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+ bool changed = fuse_write_update_size(inode, offset + length);
+
+ if (changed && fc->writeback_cache)
+ file_update_time(file);
+ }
if (mode & FALLOC_FL_PUNCH_HOLE)
truncate_pagecache_range(inode, offset, offset + length - 1);
@@ -2870,10 +3031,10 @@ out:
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
- .read = do_sync_read,
- .aio_read = fuse_file_aio_read,
- .write = do_sync_write,
- .aio_write = fuse_file_aio_write,
+ .read = new_sync_read,
+ .read_iter = fuse_file_read_iter,
+ .write = new_sync_write,
+ .write_iter = fuse_file_write_iter,
.mmap = fuse_file_mmap,
.open = fuse_open,
.flush = fuse_flush,
@@ -2915,6 +3076,8 @@ static const struct address_space_operations fuse_file_aops = {
.set_page_dirty = __set_page_dirty_nobuffers,
.bmap = fuse_bmap,
.direct_IO = fuse_direct_IO,
+ .write_begin = fuse_write_begin,
+ .write_end = fuse_write_end,
};
void fuse_init_file_inode(struct inode *inode)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 2da5db2c8bd..e8e47a6ab51 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -480,6 +480,9 @@ struct fuse_conn {
/** Set if bdi is valid */
unsigned bdi_initialized:1;
+ /** write-back cache policy (default is write-through) */
+ unsigned writeback_cache:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -539,6 +542,9 @@ struct fuse_conn {
/** Is fallocate not implemented by fs? */
unsigned no_fallocate:1;
+ /** Is rename with flags implemented by fs? */
+ unsigned no_rename2:1;
+
/** Use enhanced/automatic page cache invalidation. */
unsigned auto_inval_data:1;
@@ -720,7 +726,7 @@ int fuse_dev_init(void);
void fuse_dev_cleanup(void);
int fuse_ctl_init(void);
-void fuse_ctl_cleanup(void);
+void __exit fuse_ctl_cleanup(void);
/**
* Allocate a request
@@ -863,9 +869,19 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir);
-ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
- unsigned long nr_segs, size_t count, loff_t *ppos,
- int write);
+
+/**
+ * fuse_direct_io() flags
+ */
+
+/** If set, it is WRITE; otherwise - READ */
+#define FUSE_DIO_WRITE (1 << 0)
+
+/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
+#define FUSE_DIO_CUSE (1 << 1)
+
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+ loff_t *ppos, int flags);
long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
unsigned int flags);
long fuse_ioctl_common(struct file *file, unsigned int cmd,
@@ -873,7 +889,10 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
unsigned fuse_file_poll(struct file *file, poll_table *wait);
int fuse_dev_release(struct inode *inode, struct file *file);
-void fuse_write_update_size(struct inode *inode, loff_t pos);
+bool fuse_write_update_size(struct inode *inode, loff_t pos);
+
+int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
+int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
int fuse_do_setattr(struct inode *inode, struct iattr *attr,
struct file *file);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d468643a68b..03246cd9d47 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -123,7 +123,7 @@ static void fuse_destroy_inode(struct inode *inode)
static void fuse_evict_inode(struct inode *inode)
{
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
if (inode->i_sb->s_flags & MS_ACTIVE) {
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -135,6 +135,7 @@ static void fuse_evict_inode(struct inode *inode)
static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
{
+ sync_filesystem(sb);
if (*flags & MS_MANDLOCK)
return -EINVAL;
@@ -170,10 +171,13 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
inode->i_blocks = attr->blocks;
inode->i_atime.tv_sec = attr->atime;
inode->i_atime.tv_nsec = attr->atimensec;
- inode->i_mtime.tv_sec = attr->mtime;
- inode->i_mtime.tv_nsec = attr->mtimensec;
- inode->i_ctime.tv_sec = attr->ctime;
- inode->i_ctime.tv_nsec = attr->ctimensec;
+ /* mtime from server may be stale due to local buffered write */
+ if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
+ }
if (attr->blksize != 0)
inode->i_blkbits = ilog2(attr->blksize);
@@ -197,6 +201,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
+ bool is_wb = fc->writeback_cache;
loff_t oldsize;
struct timespec old_mtime;
@@ -211,10 +216,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_change_attributes_common(inode, attr, attr_valid);
oldsize = inode->i_size;
- i_size_write(inode, attr->size);
+ /*
+ * In case of writeback_cache enabled, the cached writes beyond EOF
+ * extend local i_size without keeping userspace server in sync. So,
+ * attr->size coming from server can be stale. We cannot trust it.
+ */
+ if (!is_wb || !S_ISREG(inode->i_mode))
+ i_size_write(inode, attr->size);
spin_unlock(&fc->lock);
- if (S_ISREG(inode->i_mode)) {
+ if (!is_wb && S_ISREG(inode->i_mode)) {
bool inval = false;
if (oldsize != attr->size) {
@@ -243,6 +254,10 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
inode->i_mode = attr->mode & S_IFMT;
inode->i_size = attr->size;
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
if (S_ISREG(inode->i_mode)) {
fuse_init_common(inode);
fuse_init_file_inode(inode);
@@ -289,7 +304,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
return NULL;
if ((inode->i_state & I_NEW)) {
- inode->i_flags |= S_NOATIME|S_NOCMTIME;
+ inode->i_flags |= S_NOATIME;
+ if (!fc->writeback_cache || !S_ISREG(attr->mode))
+ inode->i_flags |= S_NOCMTIME;
inode->i_generation = generation;
inode->i_data.backing_dev_info = &fc->bdi;
fuse_init_inode(inode, attr);
@@ -461,6 +478,17 @@ static const match_table_t tokens = {
{OPT_ERR, NULL}
};
+static int fuse_match_uint(substring_t *s, unsigned int *res)
+{
+ int err = -ENOMEM;
+ char *buf = match_strdup(s);
+ if (buf) {
+ err = kstrtouint(buf, 10, res);
+ kfree(buf);
+ }
+ return err;
+}
+
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
{
char *p;
@@ -471,6 +499,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
while ((p = strsep(&opt, ",")) != NULL) {
int token;
int value;
+ unsigned uv;
substring_t args[MAX_OPT_ARGS];
if (!*p)
continue;
@@ -494,18 +523,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
break;
case OPT_USER_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->user_id = make_kuid(current_user_ns(), value);
+ d->user_id = make_kuid(current_user_ns(), uv);
if (!uid_valid(d->user_id))
return 0;
d->user_id_present = 1;
break;
case OPT_GROUP_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->group_id = make_kgid(current_user_ns(), value);
+ d->group_id = make_kgid(current_user_ns(), uv);
if (!gid_valid(d->group_id))
return 0;
d->group_id_present = 1;
@@ -773,6 +802,7 @@ static const struct super_operations fuse_super_operations = {
.alloc_inode = fuse_alloc_inode,
.destroy_inode = fuse_destroy_inode,
.evict_inode = fuse_evict_inode,
+ .write_inode = fuse_write_inode,
.drop_inode = generic_delete_inode,
.remount_fs = fuse_remount_fs,
.put_super = fuse_put_super,
@@ -873,6 +903,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
}
if (arg->flags & FUSE_ASYNC_DIO)
fc->async_dio = 1;
+ if (arg->flags & FUSE_WRITEBACK_CACHE)
+ fc->writeback_cache = 1;
+ if (arg->time_gran && arg->time_gran <= 1000000000)
+ fc->sb->s_time_gran = arg->time_gran;
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
@@ -900,7 +934,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
- FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO;
+ FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
+ FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -978,9 +1013,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_flags & MS_MANDLOCK)
goto err;
- sb->s_flags &= ~MS_NOSEC;
+ sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
- if (!parse_fuse_opt((char *) data, &d, is_bdev))
+ if (!parse_fuse_opt(data, &d, is_bdev))
goto err;
if (is_bdev) {