aboutsummaryrefslogtreecommitdiff
path: root/fs/fuse
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fuse')
-rw-r--r--fs/fuse/Kconfig16
-rw-r--r--fs/fuse/control.c21
-rw-r--r--fs/fuse/cuse.c95
-rw-r--r--fs/fuse/dev.c559
-rw-r--r--fs/fuse/dir.c944
-rw-r--r--fs/fuse/file.c1713
-rw-r--r--fs/fuse/fuse_i.h216
-rw-r--r--fs/fuse/inode.c314
8 files changed, 2997 insertions, 881 deletions
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 0cf160a94ed..1b2f6c2c3aa 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -4,12 +4,24 @@ config FUSE_FS
With FUSE it is possible to implement a fully functional filesystem
in a userspace program.
- There's also companion library: libfuse. This library along with
- utilities is available from the FUSE homepage:
+ There's also a companion library: libfuse2. This library is available
+ from the FUSE homepage:
<http://fuse.sourceforge.net/>
+ although chances are your distribution already has that library
+ installed if you've installed the "fuse" package itself.
See <file:Documentation/filesystems/fuse.txt> for more information.
See <file:Documentation/Changes> for needed library/utility version.
If you want to develop a userspace FS, or if you want to use
a filesystem based on FUSE, answer Y or M.
+
+config CUSE
+ tristate "Character device in Userspace support"
+ depends on FUSE_FS
+ help
+ This FUSE extension allows character devices to be
+ implemented in userspace.
+
+ If you want to develop or use a userspace character device
+ based on CUSE, answer Y or M.
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 85542a7daf4..205e0d5d530 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
{
struct fuse_conn *fc;
mutex_lock(&fuse_mutex);
- fc = file->f_path.dentry->d_inode->i_private;
+ fc = file_inode(file)->i_private;
if (fc)
fc = fuse_conn_get(fc);
mutex_unlock(&fuse_mutex);
@@ -75,19 +75,13 @@ static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf,
unsigned global_limit)
{
unsigned long t;
- char tmp[32];
unsigned limit = (1 << 16) - 1;
int err;
- if (*ppos || count >= sizeof(tmp) - 1)
- return -EINVAL;
-
- if (copy_from_user(tmp, buf, count))
+ if (*ppos)
return -EINVAL;
- tmp[count] = '\0';
-
- err = strict_strtoul(tmp, 0, &t);
+ err = kstrtoul_from_user(buf, count, 0, &t);
if (err)
return err;
@@ -123,7 +117,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
- unsigned val;
+ unsigned uninitialized_var(val);
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -160,7 +154,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
- unsigned val;
+ unsigned uninitialized_var(val);
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -231,7 +225,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
if (iop)
inode->i_op = iop;
inode->i_fop = fop;
- inode->i_nlink = nlink;
+ set_nlink(inode, nlink);
inode->i_private = fc;
d_add(dentry, inode);
return dentry;
@@ -347,13 +341,14 @@ static struct file_system_type fuse_ctl_fs_type = {
.mount = fuse_ctl_mount,
.kill_sb = fuse_ctl_kill_sb,
};
+MODULE_ALIAS_FS("fusectl");
int __init fuse_ctl_init(void)
{
return register_filesystem(&fuse_ctl_fs_type);
}
-void fuse_ctl_cleanup(void)
+void __exit fuse_ctl_cleanup(void)
{
unregister_filesystem(&fuse_ctl_fs_type);
}
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 3e87cce5837..966ace8b243 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,6 +38,7 @@
#include <linux/device.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/aio.h>
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/list.h>
@@ -45,8 +46,8 @@
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/spinlock.h>
#include <linux/stat.h>
+#include <linux/module.h>
#include "fuse_i.h"
@@ -62,7 +63,7 @@ struct cuse_conn {
bool unrestricted_ioctl;
};
-static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */
+static DEFINE_MUTEX(cuse_lock); /* protects registration */
static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
static struct class *cuse_class;
@@ -91,19 +92,29 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
loff_t pos = 0;
+ struct iovec iov = { .iov_base = buf, .iov_len = count };
+ struct fuse_io_priv io = { .async = 0, .file = file };
+ struct iov_iter ii;
+ iov_iter_init(&ii, READ, &iov, 1, count);
- return fuse_direct_io(file, buf, count, &pos, 0);
+ return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE);
}
static ssize_t cuse_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
loff_t pos = 0;
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
+ struct fuse_io_priv io = { .async = 0, .file = file };
+ struct iov_iter ii;
+ iov_iter_init(&ii, WRITE, &iov, 1, count);
+
/*
* No locking or generic_write_checks(), the server is
* responsible for locking and sanity checks.
*/
- return fuse_direct_io(file, buf, count, &pos, 1);
+ return fuse_direct_io(&io, &ii, &pos,
+ FUSE_DIO_WRITE | FUSE_DIO_CUSE);
}
static int cuse_open(struct inode *inode, struct file *file)
@@ -113,14 +124,14 @@ static int cuse_open(struct inode *inode, struct file *file)
int rc;
/* look up and get the connection */
- spin_lock(&cuse_lock);
+ mutex_lock(&cuse_lock);
list_for_each_entry(pos, cuse_conntbl_head(devt), list)
if (pos->dev->devt == devt) {
fuse_conn_get(&pos->fc);
cc = pos;
break;
}
- spin_unlock(&cuse_lock);
+ mutex_unlock(&cuse_lock);
/* dead? */
if (!cc)
@@ -266,7 +277,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
{
char *end = p + len;
- char *key, *val;
+ char *uninitialized_var(key), *uninitialized_var(val);
int rc;
while (true) {
@@ -304,14 +315,14 @@ static void cuse_gendev_release(struct device *dev)
*/
static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
{
- struct cuse_conn *cc = fc_to_cc(fc);
- struct cuse_init_out *arg = &req->misc.cuse_init_out;
+ struct cuse_conn *cc = fc_to_cc(fc), *pos;
+ struct cuse_init_out *arg = req->out.args[0].value;
struct page *page = req->pages[0];
struct cuse_devinfo devinfo = { };
struct device *dev;
struct cdev *cdev;
dev_t devt;
- int rc;
+ int rc, i;
if (req->out.h.error ||
arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
@@ -355,15 +366,24 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
dev_set_drvdata(dev, cc);
dev_set_name(dev, "%s", devinfo.name);
+ mutex_lock(&cuse_lock);
+
+ /* make sure the device-name is unique */
+ for (i = 0; i < CUSE_CONNTBL_LEN; ++i) {
+ list_for_each_entry(pos, &cuse_conntbl[i], list)
+ if (!strcmp(dev_name(pos->dev), dev_name(dev)))
+ goto err_unlock;
+ }
+
rc = device_add(dev);
if (rc)
- goto err_device;
+ goto err_unlock;
/* register cdev */
rc = -ENOMEM;
cdev = cdev_alloc();
if (!cdev)
- goto err_device;
+ goto err_unlock;
cdev->owner = THIS_MODULE;
cdev->ops = &cuse_frontend_fops;
@@ -376,25 +396,26 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
cc->cdev = cdev;
/* make the device available */
- spin_lock(&cuse_lock);
list_add(&cc->list, cuse_conntbl_head(devt));
- spin_unlock(&cuse_lock);
+ mutex_unlock(&cuse_lock);
/* announce device availability */
dev_set_uevent_suppress(dev, 0);
kobject_uevent(&dev->kobj, KOBJ_ADD);
out:
+ kfree(arg);
__free_page(page);
return;
err_cdev:
cdev_del(cdev);
-err_device:
+err_unlock:
+ mutex_unlock(&cuse_lock);
put_device(dev);
err_region:
unregister_chrdev_region(devt, 1);
err:
- fc->conn_error = 1;
+ fuse_conn_kill(fc);
goto out;
}
@@ -405,10 +426,11 @@ static int cuse_send_init(struct cuse_conn *cc)
struct page *page;
struct fuse_conn *fc = &cc->fc;
struct cuse_init_in *arg;
+ void *outarg;
BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
- req = fuse_get_req(fc);
+ req = fuse_get_req_for_background(fc, 1);
if (IS_ERR(req)) {
rc = PTR_ERR(req);
goto err;
@@ -419,6 +441,10 @@ static int cuse_send_init(struct cuse_conn *cc)
if (!page)
goto err_put_req;
+ outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
+ if (!outarg)
+ goto err_free_page;
+
arg = &req->misc.cuse_init_in;
arg->major = FUSE_KERNEL_VERSION;
arg->minor = FUSE_KERNEL_MINOR_VERSION;
@@ -429,17 +455,20 @@ static int cuse_send_init(struct cuse_conn *cc)
req->in.args[0].value = arg;
req->out.numargs = 2;
req->out.args[0].size = sizeof(struct cuse_init_out);
- req->out.args[0].value = &req->misc.cuse_init_out;
+ req->out.args[0].value = outarg;
req->out.args[1].size = CUSE_INIT_INFO_MAX;
req->out.argvar = 1;
req->out.argpages = 1;
req->pages[0] = page;
+ req->page_descs[0].length = req->out.args[1].size;
req->num_pages = 1;
req->end = cuse_process_init_reply;
fuse_request_send_background(fc, req);
return 0;
+err_free_page:
+ __free_page(page);
err_put_req:
fuse_put_request(fc, req);
err:
@@ -449,7 +478,7 @@ err:
static void cuse_fc_release(struct fuse_conn *fc)
{
struct cuse_conn *cc = fc_to_cc(fc);
- kfree(cc);
+ kfree_rcu(cc, fc.rcu);
}
/**
@@ -458,7 +487,7 @@ static void cuse_fc_release(struct fuse_conn *fc)
* @file: file struct being opened
*
* Userland CUSE server can create a CUSE device by opening /dev/cuse
- * and replying to the initilaization request kernel sends. This
+ * and replying to the initialization request kernel sends. This
* function is responsible for handling CUSE device initialization.
* Because the fd opened by this function is used during
* initialization, this function only creates cuse_conn and sends
@@ -483,7 +512,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
cc->fc.release = cuse_fc_release;
cc->fc.connected = 1;
- cc->fc.blocked = 0;
+ cc->fc.initialized = 1;
rc = cuse_send_init(cc);
if (rc) {
fuse_conn_put(&cc->fc);
@@ -511,9 +540,9 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
int rc;
/* remove from the conntbl, no more access from this point on */
- spin_lock(&cuse_lock);
+ mutex_lock(&cuse_lock);
list_del_init(&cc->list);
- spin_unlock(&cuse_lock);
+ mutex_unlock(&cuse_lock);
/* remove device */
if (cc->dev)
@@ -523,8 +552,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
cdev_del(cc->cdev);
}
- /* kill connection and shutdown channel */
- fuse_conn_kill(&cc->fc);
rc = fuse_dev_release(inode, file); /* puts the base reference */
return rc;
@@ -546,6 +573,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev,
return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
}
+static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL);
static ssize_t cuse_class_abort_store(struct device *dev,
struct device_attribute *attr,
@@ -556,19 +584,24 @@ static ssize_t cuse_class_abort_store(struct device *dev,
fuse_abort_conn(&cc->fc);
return count;
}
+static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store);
-static struct device_attribute cuse_class_dev_attrs[] = {
- __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
- __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
- { }
+static struct attribute *cuse_class_dev_attrs[] = {
+ &dev_attr_waiting.attr,
+ &dev_attr_abort.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(cuse_class_dev);
static struct miscdevice cuse_miscdev = {
- .minor = MISC_DYNAMIC_MINOR,
+ .minor = CUSE_MINOR,
.name = "cuse",
.fops = &cuse_channel_fops,
};
+MODULE_ALIAS_MISCDEV(CUSE_MINOR);
+MODULE_ALIAS("devname:cuse");
+
static int __init cuse_init(void)
{
int i, rc;
@@ -587,7 +620,7 @@ static int __init cuse_init(void)
if (IS_ERR(cuse_class))
return PTR_ERR(cuse_class);
- cuse_class->dev_attrs = cuse_class_dev_attrs;
+ cuse_class->dev_groups = cuse_class_dev_groups;
rc = misc_register(&cuse_miscdev);
if (rc) {
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6e07696308d..ca887314aba 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,6 +19,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/swap.h>
#include <linux/splice.h>
+#include <linux/aio.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
@@ -34,34 +35,67 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
return file->private_data;
}
-static void fuse_request_init(struct fuse_req *req)
+static void fuse_request_init(struct fuse_req *req, struct page **pages,
+ struct fuse_page_desc *page_descs,
+ unsigned npages)
{
memset(req, 0, sizeof(*req));
+ memset(pages, 0, sizeof(*pages) * npages);
+ memset(page_descs, 0, sizeof(*page_descs) * npages);
INIT_LIST_HEAD(&req->list);
INIT_LIST_HEAD(&req->intr_entry);
init_waitqueue_head(&req->waitq);
atomic_set(&req->count, 1);
+ req->pages = pages;
+ req->page_descs = page_descs;
+ req->max_pages = npages;
}
-struct fuse_req *fuse_request_alloc(void)
+static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
{
- struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
- if (req)
- fuse_request_init(req);
+ struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
+ if (req) {
+ struct page **pages;
+ struct fuse_page_desc *page_descs;
+
+ if (npages <= FUSE_REQ_INLINE_PAGES) {
+ pages = req->inline_pages;
+ page_descs = req->inline_page_descs;
+ } else {
+ pages = kmalloc(sizeof(struct page *) * npages, flags);
+ page_descs = kmalloc(sizeof(struct fuse_page_desc) *
+ npages, flags);
+ }
+
+ if (!pages || !page_descs) {
+ kfree(pages);
+ kfree(page_descs);
+ kmem_cache_free(fuse_req_cachep, req);
+ return NULL;
+ }
+
+ fuse_request_init(req, pages, page_descs, npages);
+ }
return req;
}
+
+struct fuse_req *fuse_request_alloc(unsigned npages)
+{
+ return __fuse_request_alloc(npages, GFP_KERNEL);
+}
EXPORT_SYMBOL_GPL(fuse_request_alloc);
-struct fuse_req *fuse_request_alloc_nofs(void)
+struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
{
- struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
- if (req)
- fuse_request_init(req);
- return req;
+ return __fuse_request_alloc(npages, GFP_NOFS);
}
void fuse_request_free(struct fuse_req *req)
{
+ if (req->pages != req->inline_pages) {
+ kfree(req->pages);
+ kfree(req->page_descs);
+ }
kmem_cache_free(fuse_req_cachep, req);
}
@@ -78,7 +112,7 @@ static void restore_sigs(sigset_t *oldset)
sigprocmask(SIG_SETMASK, oldset, NULL);
}
-static void __fuse_get_request(struct fuse_req *req)
+void __fuse_get_request(struct fuse_req *req)
{
atomic_inc(&req->count);
}
@@ -92,45 +126,71 @@ static void __fuse_put_request(struct fuse_req *req)
static void fuse_req_init_context(struct fuse_req *req)
{
- req->in.h.uid = current_fsuid();
- req->in.h.gid = current_fsgid();
+ req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
+ req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
req->in.h.pid = current->pid;
}
-struct fuse_req *fuse_get_req(struct fuse_conn *fc)
+static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
+{
+ return !fc->initialized || (for_background && fc->blocked);
+}
+
+static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
+ bool for_background)
{
struct fuse_req *req;
- sigset_t oldset;
- int intr;
int err;
-
atomic_inc(&fc->num_waiting);
- block_sigs(&oldset);
- intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
- restore_sigs(&oldset);
- err = -EINTR;
- if (intr)
- goto out;
+
+ if (fuse_block_alloc(fc, for_background)) {
+ sigset_t oldset;
+ int intr;
+
+ block_sigs(&oldset);
+ intr = wait_event_interruptible_exclusive(fc->blocked_waitq,
+ !fuse_block_alloc(fc, for_background));
+ restore_sigs(&oldset);
+ err = -EINTR;
+ if (intr)
+ goto out;
+ }
err = -ENOTCONN;
if (!fc->connected)
goto out;
- req = fuse_request_alloc();
+ req = fuse_request_alloc(npages);
err = -ENOMEM;
- if (!req)
+ if (!req) {
+ if (for_background)
+ wake_up(&fc->blocked_waitq);
goto out;
+ }
fuse_req_init_context(req);
req->waiting = 1;
+ req->background = for_background;
return req;
out:
atomic_dec(&fc->num_waiting);
return ERR_PTR(err);
}
+
+struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
+{
+ return __fuse_get_req(fc, npages, false);
+}
EXPORT_SYMBOL_GPL(fuse_get_req);
+struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
+ unsigned npages)
+{
+ return __fuse_get_req(fc, npages, true);
+}
+EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
+
/*
* Return request in fuse_file->reserved_req. However that may
* currently be in use. If that is the case, wait for it to become
@@ -148,8 +208,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
if (ff->reserved_req) {
req = ff->reserved_req;
ff->reserved_req = NULL;
- get_file(file);
- req->stolen_file = file;
+ req->stolen_file = get_file(file);
}
spin_unlock(&fc->lock);
} while (!req);
@@ -166,7 +225,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
struct fuse_file *ff = file->private_data;
spin_lock(&fc->lock);
- fuse_request_init(req);
+ fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
BUG_ON(ff->reserved_req);
ff->reserved_req = req;
wake_up_all(&fc->reserved_req_waitq);
@@ -187,24 +246,37 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
* filesystem should not have it's own file open. If deadlock is
* intentional, it can still be broken by "aborting" the filesystem.
*/
-struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
+struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
+ struct file *file)
{
struct fuse_req *req;
atomic_inc(&fc->num_waiting);
- wait_event(fc->blocked_waitq, !fc->blocked);
- req = fuse_request_alloc();
+ wait_event(fc->blocked_waitq, fc->initialized);
+ req = fuse_request_alloc(0);
if (!req)
req = get_reserved_req(fc, file);
fuse_req_init_context(req);
req->waiting = 1;
+ req->background = 0;
return req;
}
void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
{
if (atomic_dec_and_test(&req->count)) {
+ if (unlikely(req->background)) {
+ /*
+ * We get here in the unlikely case that a background
+ * request was allocated but not sent
+ */
+ spin_lock(&fc->lock);
+ if (!fc->blocked)
+ wake_up(&fc->blocked_waitq);
+ spin_unlock(&fc->lock);
+ }
+
if (req->waiting)
atomic_dec(&fc->num_waiting);
@@ -251,6 +323,24 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
+void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
+ u64 nodeid, u64 nlookup)
+{
+ forget->forget_one.nodeid = nodeid;
+ forget->forget_one.nlookup = nlookup;
+
+ spin_lock(&fc->lock);
+ if (fc->connected) {
+ fc->forget_list_tail->next = forget;
+ fc->forget_list_tail = forget;
+ wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+ } else {
+ kfree(forget);
+ }
+ spin_unlock(&fc->lock);
+}
+
static void flush_bg_queue(struct fuse_conn *fc)
{
while (fc->active_background < fc->max_background &&
@@ -284,10 +374,15 @@ __releases(fc->lock)
list_del(&req->intr_entry);
req->state = FUSE_REQ_FINISHED;
if (req->background) {
- if (fc->num_background == fc->max_background) {
+ req->background = 0;
+
+ if (fc->num_background == fc->max_background)
fc->blocked = 0;
- wake_up_all(&fc->blocked_waitq);
- }
+
+ /* Wake up next waiter, if any */
+ if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
+ wake_up(&fc->blocked_waitq);
+
if (fc->num_background == fc->congestion_threshold &&
fc->connected && fc->bdi_initialized) {
clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
@@ -389,9 +484,9 @@ __acquires(fc->lock)
}
}
-void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
{
- req->isreply = 1;
+ BUG_ON(req->background);
spin_lock(&fc->lock);
if (!fc->connected)
req->out.h.error = -ENOTCONN;
@@ -408,12 +503,18 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
}
spin_unlock(&fc->lock);
}
+
+void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ __fuse_request_send(fc, req);
+}
EXPORT_SYMBOL_GPL(fuse_request_send);
static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
struct fuse_req *req)
{
- req->background = 1;
+ BUG_ON(!req->background);
fc->num_background++;
if (fc->num_background == fc->max_background)
fc->blocked = 1;
@@ -438,12 +539,6 @@ static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
}
}
-void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
-{
- req->isreply = 0;
- fuse_request_send_nowait(fc, req);
-}
-
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
{
req->isreply = 1;
@@ -480,6 +575,27 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
fuse_request_send_nowait_locked(fc, req);
}
+void fuse_force_forget(struct file *file, u64 nodeid)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_forget_in inarg;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.nlookup = 1;
+ req = fuse_get_req_nofail_nopages(fc, file);
+ req->in.h.opcode = FUSE_FORGET;
+ req->in.h.nodeid = nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->isreply = 0;
+ __fuse_request_send(fc, req);
+ /* ignore errors */
+ fuse_put_request(fc, req);
+}
+
/*
* Lock the request. Up to the next unlock_request() there mustn't be
* anything that could cause a page-fault. If the request was already
@@ -527,9 +643,8 @@ struct fuse_copy_state {
unsigned long seglen;
unsigned long addr;
struct page *pg;
- void *mapaddr;
- void *buf;
unsigned len;
+ unsigned offset;
unsigned move_pages:1;
};
@@ -550,23 +665,17 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
if (cs->currbuf) {
struct pipe_buffer *buf = cs->currbuf;
- if (!cs->write) {
- buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
- } else {
- kunmap(buf->page);
+ if (cs->write)
buf->len = PAGE_SIZE - cs->len;
- }
cs->currbuf = NULL;
- cs->mapaddr = NULL;
- } else if (cs->mapaddr) {
- kunmap(cs->pg);
+ } else if (cs->pg) {
if (cs->write) {
flush_dcache_page(cs->pg);
set_page_dirty_lock(cs->pg);
}
put_page(cs->pg);
- cs->mapaddr = NULL;
}
+ cs->pg = NULL;
}
/*
@@ -575,7 +684,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
*/
static int fuse_copy_fill(struct fuse_copy_state *cs)
{
- unsigned long offset;
+ struct page *page;
int err;
unlock_request(cs->fc, cs->req);
@@ -590,14 +699,12 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
- cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
+ cs->pg = buf->page;
+ cs->offset = buf->offset;
cs->len = buf->len;
- cs->buf = cs->mapaddr + buf->offset;
cs->pipebufs++;
cs->nr_segs--;
} else {
- struct page *page;
-
if (cs->nr_segs == cs->pipe->buffers)
return -EIO;
@@ -610,8 +717,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
buf->len = 0;
cs->currbuf = buf;
- cs->mapaddr = kmap(page);
- cs->buf = cs->mapaddr;
+ cs->pg = page;
+ cs->offset = 0;
cs->len = PAGE_SIZE;
cs->pipebufs++;
cs->nr_segs++;
@@ -624,14 +731,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->iov++;
cs->nr_segs--;
}
- err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
+ err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
if (err < 0)
return err;
BUG_ON(err != 1);
- offset = cs->addr % PAGE_SIZE;
- cs->mapaddr = kmap(cs->pg);
- cs->buf = cs->mapaddr + offset;
- cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->pg = page;
+ cs->offset = cs->addr % PAGE_SIZE;
+ cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
cs->seglen -= cs->len;
cs->addr += cs->len;
}
@@ -644,15 +750,20 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
{
unsigned ncpy = min(*size, cs->len);
if (val) {
+ void *pgaddr = kmap_atomic(cs->pg);
+ void *buf = pgaddr + cs->offset;
+
if (cs->write)
- memcpy(cs->buf, *val, ncpy);
+ memcpy(buf, *val, ncpy);
else
- memcpy(*val, cs->buf, ncpy);
+ memcpy(*val, buf, ncpy);
+
+ kunmap_atomic(pgaddr);
*val += ncpy;
}
*size -= ncpy;
cs->len -= ncpy;
- cs->buf += ncpy;
+ cs->offset += ncpy;
return ncpy;
}
@@ -681,8 +792,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
struct page *oldpage = *pagep;
struct page *newpage;
struct pipe_buffer *buf = cs->pipebufs;
- struct address_space *mapping;
- pgoff_t index;
unlock_request(cs->fc, cs->req);
fuse_copy_finish(cs);
@@ -713,9 +822,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (fuse_check_page(newpage) != 0)
goto out_fallback_unlock;
- mapping = oldpage->mapping;
- index = oldpage->index;
-
/*
* This is a new and locked page, it shouldn't be mapped or
* have any special flags on it
@@ -729,14 +835,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (WARN_ON(PageMlocked(oldpage)))
goto out_fallback_unlock;
- remove_from_page_cache(oldpage);
- page_cache_release(oldpage);
-
- err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
+ err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
if (err) {
- printk(KERN_WARNING "fuse_try_move_page: failed to add page");
- goto out_fallback_unlock;
+ unlock_page(newpage);
+ return err;
}
+
page_cache_get(newpage);
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
@@ -765,8 +869,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
out_fallback_unlock:
unlock_page(newpage);
out_fallback:
- cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
- cs->buf = cs->mapaddr + buf->offset;
+ cs->pg = buf->page;
+ cs->offset = buf->offset;
err = lock_request(cs->fc, cs->req);
if (err)
@@ -828,10 +932,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
}
}
if (page) {
- void *mapaddr = kmap_atomic(page, KM_USER0);
+ void *mapaddr = kmap_atomic(page);
void *buf = mapaddr + offset;
offset += fuse_copy_do(cs, &buf, &count);
- kunmap_atomic(mapaddr, KM_USER0);
+ kunmap_atomic(mapaddr);
} else
offset += fuse_copy_do(cs, NULL, &count);
}
@@ -846,11 +950,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
{
unsigned i;
struct fuse_req *req = cs->req;
- unsigned offset = req->page_offset;
- unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
int err;
+ unsigned offset = req->page_descs[i].offset;
+ unsigned count = min(nbytes, req->page_descs[i].length);
err = fuse_copy_page(cs, &req->pages[i], offset, count,
zeroing);
@@ -858,8 +962,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
return err;
nbytes -= count;
- count = min(nbytes, (unsigned) PAGE_SIZE);
- offset = 0;
}
return 0;
}
@@ -896,9 +998,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
return err;
}
+static int forget_pending(struct fuse_conn *fc)
+{
+ return fc->forget_list_head.next != NULL;
+}
+
static int request_pending(struct fuse_conn *fc)
{
- return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
+ return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
+ forget_pending(fc);
}
/* Wait until a request is available on the pending list */
@@ -960,6 +1068,120 @@ __releases(fc->lock)
return err ? err : reqsize;
}
+static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
+ unsigned max,
+ unsigned *countp)
+{
+ struct fuse_forget_link *head = fc->forget_list_head.next;
+ struct fuse_forget_link **newhead = &head;
+ unsigned count;
+
+ for (count = 0; *newhead != NULL && count < max; count++)
+ newhead = &(*newhead)->next;
+
+ fc->forget_list_head.next = *newhead;
+ *newhead = NULL;
+ if (fc->forget_list_head.next == NULL)
+ fc->forget_list_tail = &fc->forget_list_head;
+
+ if (countp != NULL)
+ *countp = count;
+
+ return head;
+}
+
+static int fuse_read_single_forget(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ size_t nbytes)
+__releases(fc->lock)
+{
+ int err;
+ struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
+ struct fuse_forget_in arg = {
+ .nlookup = forget->forget_one.nlookup,
+ };
+ struct fuse_in_header ih = {
+ .opcode = FUSE_FORGET,
+ .nodeid = forget->forget_one.nodeid,
+ .unique = fuse_get_unique(fc),
+ .len = sizeof(ih) + sizeof(arg),
+ };
+
+ spin_unlock(&fc->lock);
+ kfree(forget);
+ if (nbytes < ih.len)
+ return -EINVAL;
+
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+ fuse_copy_finish(cs);
+
+ if (err)
+ return err;
+
+ return ih.len;
+}
+
+static int fuse_read_batch_forget(struct fuse_conn *fc,
+ struct fuse_copy_state *cs, size_t nbytes)
+__releases(fc->lock)
+{
+ int err;
+ unsigned max_forgets;
+ unsigned count;
+ struct fuse_forget_link *head;
+ struct fuse_batch_forget_in arg = { .count = 0 };
+ struct fuse_in_header ih = {
+ .opcode = FUSE_BATCH_FORGET,
+ .unique = fuse_get_unique(fc),
+ .len = sizeof(ih) + sizeof(arg),
+ };
+
+ if (nbytes < ih.len) {
+ spin_unlock(&fc->lock);
+ return -EINVAL;
+ }
+
+ max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
+ head = dequeue_forget(fc, max_forgets, &count);
+ spin_unlock(&fc->lock);
+
+ arg.count = count;
+ ih.len += count * sizeof(struct fuse_forget_one);
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+
+ while (head) {
+ struct fuse_forget_link *forget = head;
+
+ if (!err) {
+ err = fuse_copy_one(cs, &forget->forget_one,
+ sizeof(forget->forget_one));
+ }
+ head = forget->next;
+ kfree(forget);
+ }
+
+ fuse_copy_finish(cs);
+
+ if (err)
+ return err;
+
+ return ih.len;
+}
+
+static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
+ size_t nbytes)
+__releases(fc->lock)
+{
+ if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
+ return fuse_read_single_forget(fc, cs, nbytes);
+ else
+ return fuse_read_batch_forget(fc, cs, nbytes);
+}
+
/*
* Read a single request into the userspace filesystem's buffer. This
* function waits until a request is available, then removes it from
@@ -998,6 +1220,14 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
return fuse_read_interrupt(fc, cs, nbytes, req);
}
+ if (forget_pending(fc)) {
+ if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
+ return fuse_read_forget(fc, cs, nbytes);
+
+ if (fc->forget_batch <= -8)
+ fc->forget_batch = 16;
+ }
+
req = list_entry(fc->pending.next, struct fuse_req, list);
req->state = FUSE_REQ_READING;
list_move(&req->list, &fc->io);
@@ -1061,22 +1291,6 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
}
-static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- return 1;
-}
-
-static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
- .can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
- .confirm = generic_pipe_buf_confirm,
- .release = generic_pipe_buf_release,
- .steal = fuse_dev_pipe_buf_steal,
- .get = generic_pipe_buf_get,
-};
-
static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
@@ -1090,7 +1304,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!fc)
return -EPERM;
- bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs)
return -ENOMEM;
@@ -1123,13 +1337,17 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
buf->page = bufs[page_nr].page;
buf->offset = bufs[page_nr].offset;
buf->len = bufs[page_nr].len;
- buf->ops = &fuse_dev_pipe_buf_ops;
+ /*
+ * Need to be careful about this. Having buf->ops in module
+ * code can Oops if the buffer persists after module unload.
+ */
+ buf->ops = &nosteal_pipe_buf_ops;
pipe->nrbufs++;
page_nr++;
ret += buf->len;
- if (pipe->inode)
+ if (pipe->files)
do_wakeup = 1;
}
@@ -1224,6 +1442,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
if (outarg.namelen > FUSE_NAME_MAX)
goto err;
+ err = -EINVAL;
+ if (size != sizeof(outarg) + outarg.namelen + 1)
+ goto err;
+
name.name = buf;
name.len = outarg.namelen;
err = fuse_copy_one(cs, buf, outarg.namelen + 1);
@@ -1236,7 +1458,59 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb);
err = -ENOENT;
if (fc->sb)
- err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+ err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
+ up_read(&fc->killsb);
+ kfree(buf);
+ return err;
+
+err:
+ kfree(buf);
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_delete_out outarg;
+ int err = -ENOMEM;
+ char *buf;
+ struct qstr name;
+
+ buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
+ if (!buf)
+ goto err;
+
+ err = -EINVAL;
+ if (size < sizeof(outarg))
+ goto err;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto err;
+
+ err = -ENAMETOOLONG;
+ if (outarg.namelen > FUSE_NAME_MAX)
+ goto err;
+
+ err = -EINVAL;
+ if (size != sizeof(outarg) + outarg.namelen + 1)
+ goto err;
+
+ name.name = buf;
+ name.len = outarg.namelen;
+ err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+ if (err)
+ goto err;
+ fuse_copy_finish(cs);
+ buf[outarg.namelen] = 0;
+ name.hash = full_name_hash(name.name, name.len);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (fc->sb)
+ err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
+ outarg.child, &name);
up_read(&fc->killsb);
kfree(buf);
return err;
@@ -1308,7 +1582,8 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
err = fuse_copy_page(cs, &page, offset, this_num, 0);
- if (!err && offset == 0 && (num != 0 || file_size == end))
+ if (!err && offset == 0 &&
+ (this_num == PAGE_CACHE_SIZE || file_size == end))
SetPageUptodate(page);
unlock_page(page);
page_cache_release(page);
@@ -1334,7 +1609,7 @@ out_finish:
static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
{
- release_pages(req->pages, req->num_pages, 0);
+ release_pages(req->pages, req->num_pages, false);
}
static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
@@ -1348,29 +1623,34 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
+ int num_pages;
+
+ offset = outarg->offset & ~PAGE_CACHE_MASK;
+ file_size = i_size_read(inode);
- req = fuse_get_req(fc);
+ num = outarg->size;
+ if (outarg->offset > file_size)
+ num = 0;
+ else if (outarg->offset + num > file_size)
+ num = file_size - outarg->offset;
+
+ num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
+
+ req = fuse_get_req(fc, num_pages);
if (IS_ERR(req))
return PTR_ERR(req);
- offset = outarg->offset & ~PAGE_CACHE_MASK;
-
req->in.h.opcode = FUSE_NOTIFY_REPLY;
req->in.h.nodeid = outarg->nodeid;
req->in.numargs = 2;
req->in.argpages = 1;
- req->page_offset = offset;
+ req->page_descs[0].offset = offset;
req->end = fuse_retrieve_end;
index = outarg->offset >> PAGE_CACHE_SHIFT;
- file_size = i_size_read(inode);
- num = outarg->size;
- if (outarg->offset > file_size)
- num = 0;
- else if (outarg->offset + num > file_size)
- num = file_size - outarg->offset;
- while (num) {
+ while (num && req->num_pages < num_pages) {
struct page *page;
unsigned int this_num;
@@ -1380,10 +1660,13 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = this_num;
req->num_pages++;
+ offset = 0;
num -= this_num;
total_len += this_num;
+ index++;
}
req->misc.retrieve_in.offset = outarg->offset;
req->misc.retrieve_in.size = total_len;
@@ -1454,6 +1737,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_RETRIEVE:
return fuse_notify_retrieve(fc, size, cs);
+ case FUSE_NOTIFY_DELETE:
+ return fuse_notify_delete(fc, size, cs);
+
default:
fuse_copy_finish(cs);
return -EINVAL;
@@ -1463,11 +1749,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
/* Look up request on processing list by unique ID */
static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
{
- struct list_head *entry;
+ struct fuse_req *req;
- list_for_each(entry, &fc->processing) {
- struct fuse_req *req;
- req = list_entry(entry, struct fuse_req, list);
+ list_for_each_entry(req, &fc->processing, list) {
if (req->in.h.unique == unique || req->intr_unique == unique)
return req;
}
@@ -1626,7 +1910,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
if (!fc)
return -EPERM;
- bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs)
return -ENOMEM;
@@ -1770,6 +2054,23 @@ __acquires(fc->lock)
flush_bg_queue(fc);
end_requests(fc, &fc->pending);
end_requests(fc, &fc->processing);
+ while (forget_pending(fc))
+ kfree(dequeue_forget(fc, 1, NULL));
+}
+
+static void end_polls(struct fuse_conn *fc)
+{
+ struct rb_node *p;
+
+ p = rb_first(&fc->polled_files);
+
+ while (p) {
+ struct fuse_file *ff;
+ ff = rb_entry(p, struct fuse_file, polled_node);
+ wake_up_interruptible_all(&ff->poll_wait);
+
+ p = rb_next(p);
+ }
}
/*
@@ -1797,8 +2098,10 @@ void fuse_abort_conn(struct fuse_conn *fc)
if (fc->connected) {
fc->connected = 0;
fc->blocked = 0;
+ fc->initialized = 1;
end_io_requests(fc);
end_queued_requests(fc);
+ end_polls(fc);
wake_up_all(&fc->waitq);
wake_up_all(&fc->blocked_waitq);
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1814,7 +2117,9 @@ int fuse_dev_release(struct inode *inode, struct file *file)
spin_lock(&fc->lock);
fc->connected = 0;
fc->blocked = 0;
+ fc->initialized = 1;
end_queued_requests(fc);
+ end_polls(fc);
wake_up_all(&fc->blocked_waitq);
spin_unlock(&fc->lock);
fuse_conn_put(fc);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9627c95482..0c6048247a3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -10,9 +10,32 @@
#include <linux/pagemap.h>
#include <linux/file.h>
-#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/namei.h>
+#include <linux/slab.h>
+
+static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_inode *fi = get_fuse_inode(dir);
+
+ if (!fc->do_readdirplus)
+ return false;
+ if (!fc->readdirplus_auto)
+ return true;
+ if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
+ return true;
+ if (ctx->pos == 0)
+ return true;
+ return false;
+}
+
+static void fuse_advise_use_readdirplus(struct inode *dir)
+{
+ struct fuse_inode *fi = get_fuse_inode(dir);
+
+ set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
+}
#if BITS_PER_LONG >= 64
static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
@@ -89,6 +112,16 @@ void fuse_invalidate_attr(struct inode *inode)
get_fuse_inode(inode)->i_time = 0;
}
+/**
+ * Mark the attributes as stale due to an atime change. Avoid the invalidate if
+ * atime is not used.
+ */
+void fuse_invalidate_atime(struct inode *inode)
+{
+ if (!IS_RDONLY(inode))
+ fuse_invalidate_attr(inode);
+}
+
/*
* Just mark the entry as stale, so that a next attempt to look it up
* will result in a new lookup call to userspace
@@ -154,34 +187,44 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
* the lookup once more. If the lookup results in the same inode,
* then refresh the attributes, timeouts and mark the dentry valid.
*/
-static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
{
- struct inode *inode = entry->d_inode;
+ struct inode *inode;
+ struct dentry *parent;
+ struct fuse_conn *fc;
+ struct fuse_inode *fi;
+ int ret;
+ inode = ACCESS_ONCE(entry->d_inode);
if (inode && is_bad_inode(inode))
- return 0;
- else if (fuse_dentry_time(entry) < get_jiffies_64()) {
+ goto invalid;
+ else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
+ (flags & LOOKUP_REVAL)) {
int err;
struct fuse_entry_out outarg;
- struct fuse_conn *fc;
struct fuse_req *req;
- struct fuse_req *forget_req;
- struct dentry *parent;
+ struct fuse_forget_link *forget;
u64 attr_version;
/* For negative dentries, always do a fresh lookup */
if (!inode)
- return 0;
+ goto invalid;
+
+ ret = -ECHILD;
+ if (flags & LOOKUP_RCU)
+ goto out;
fc = get_fuse_conn(inode);
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
+ ret = PTR_ERR(req);
if (IS_ERR(req))
- return 0;
+ goto out;
- forget_req = fuse_get_req(fc);
- if (IS_ERR(forget_req)) {
+ forget = fuse_alloc_forget();
+ if (!forget) {
fuse_put_request(fc, req);
- return 0;
+ ret = -ENOMEM;
+ goto out;
}
attr_version = fuse_get_attr_version(fc);
@@ -197,26 +240,44 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
if (!err && !outarg.nodeid)
err = -ENOENT;
if (!err) {
- struct fuse_inode *fi = get_fuse_inode(inode);
+ fi = get_fuse_inode(inode);
if (outarg.nodeid != get_node_id(inode)) {
- fuse_send_forget(fc, forget_req,
- outarg.nodeid, 1);
- return 0;
+ fuse_queue_forget(fc, forget, outarg.nodeid, 1);
+ goto invalid;
}
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
}
- fuse_put_request(fc, forget_req);
+ kfree(forget);
if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
- return 0;
+ goto invalid;
fuse_change_attributes(inode, &outarg.attr,
entry_attr_timeout(&outarg),
attr_version);
fuse_change_entry_timeout(entry, &outarg);
+ } else if (inode) {
+ fi = get_fuse_inode(inode);
+ if (flags & LOOKUP_RCU) {
+ if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
+ return -ECHILD;
+ } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
+ parent = dget_parent(entry);
+ fuse_advise_use_readdirplus(parent->d_inode);
+ dput(parent);
+ }
}
- return 1;
+ ret = 1;
+out:
+ return ret;
+
+invalid:
+ ret = 0;
+
+ if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0)
+ ret = 1;
+ goto out;
}
static int invalid_nodeid(u64 nodeid)
@@ -234,32 +295,12 @@ int fuse_valid_type(int m)
S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
}
-/*
- * Add a directory inode to a dentry, ensuring that no other dentry
- * refers to this inode. Called with fc->inst_mutex.
- */
-static struct dentry *fuse_d_add_directory(struct dentry *entry,
- struct inode *inode)
-{
- struct dentry *alias = d_find_alias(inode);
- if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
- /* This tries to shrink the subtree below alias */
- fuse_invalidate_entry(alias);
- dput(alias);
- if (!list_empty(&inode->i_dentry))
- return ERR_PTR(-EBUSY);
- } else {
- dput(alias);
- }
- return d_splice_alias(inode, entry);
-}
-
int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_req *req;
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
u64 attr_version;
int err;
@@ -268,14 +309,14 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
if (name->len > FUSE_NAME_MAX)
goto out;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
err = PTR_ERR(req);
if (IS_ERR(req))
goto out;
- forget_req = fuse_get_req(fc);
- err = PTR_ERR(forget_req);
- if (IS_ERR(forget_req)) {
+ forget = fuse_alloc_forget();
+ err = -ENOMEM;
+ if (!forget) {
fuse_put_request(fc, req);
goto out;
}
@@ -301,25 +342,24 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
attr_version);
err = -ENOMEM;
if (!*inode) {
- fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
+ fuse_queue_forget(fc, forget, outarg->nodeid, 1);
goto out;
}
err = 0;
out_put_forget:
- fuse_put_request(fc, forget_req);
+ kfree(forget);
out:
return err;
}
static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
- struct nameidata *nd)
+ unsigned int flags)
{
int err;
struct fuse_entry_out outarg;
struct inode *inode;
struct dentry *newent;
- struct fuse_conn *fc = get_fuse_conn(dir);
bool outarg_valid = true;
err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
@@ -335,24 +375,18 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
if (inode && get_node_id(inode) == FUSE_ROOT_ID)
goto out_iput;
- if (inode && S_ISDIR(inode->i_mode)) {
- mutex_lock(&fc->inst_mutex);
- newent = fuse_d_add_directory(entry, inode);
- mutex_unlock(&fc->inst_mutex);
- err = PTR_ERR(newent);
- if (IS_ERR(newent))
- goto out_iput;
- } else {
- newent = d_splice_alias(inode, entry);
- }
+ newent = d_materialise_unique(entry, inode);
+ err = PTR_ERR(newent);
+ if (IS_ERR(newent))
+ goto out_err;
entry = newent ? newent : entry;
- entry->d_op = &fuse_dentry_operations;
if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
fuse_invalidate_entry_cache(entry);
+ fuse_advise_use_readdirplus(dir);
return newent;
out_iput:
@@ -367,32 +401,29 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
* If the filesystem doesn't support this, then fall back to separate
* 'mknod' + 'open' requests.
*/
-static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
- struct nameidata *nd)
+static int fuse_create_open(struct inode *dir, struct dentry *entry,
+ struct file *file, unsigned flags,
+ umode_t mode, int *opened)
{
int err;
struct inode *inode;
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_req *req;
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
struct fuse_create_in inarg;
struct fuse_open_out outopen;
struct fuse_entry_out outentry;
struct fuse_file *ff;
- struct file *file;
- int flags = nd->intent.open.flags - 1;
- if (fc->no_create)
- return -ENOSYS;
-
- if (flags & O_DIRECT)
- return -EINVAL;
+ /* Userspace expects S_IFREG in create mode */
+ BUG_ON((mode & S_IFMT) != S_IFREG);
- forget_req = fuse_get_req(fc);
- if (IS_ERR(forget_req))
- return PTR_ERR(forget_req);
+ forget = fuse_alloc_forget();
+ err = -ENOMEM;
+ if (!forget)
+ goto out_err;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
err = PTR_ERR(req);
if (IS_ERR(req))
goto out_put_forget_req;
@@ -429,11 +460,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
req->out.args[1].value = &outopen;
fuse_request_send(fc, req);
err = req->out.h.error;
- if (err) {
- if (err == -ENOSYS)
- fc->no_create = 1;
+ if (err)
goto out_free_ff;
- }
err = -EIO;
if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
@@ -448,47 +476,93 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(ff, flags);
- fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
- return -ENOMEM;
+ fuse_queue_forget(fc, forget, outentry.nodeid, 1);
+ err = -ENOMEM;
+ goto out_err;
}
- fuse_put_request(fc, forget_req);
+ kfree(forget);
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
fuse_invalidate_attr(dir);
- file = lookup_instantiate_filp(nd, entry, generic_file_open);
- if (IS_ERR(file)) {
+ err = finish_open(file, entry, generic_file_open, opened);
+ if (err) {
fuse_sync_release(ff, flags);
- return PTR_ERR(file);
+ } else {
+ file->private_data = fuse_file_get(ff);
+ fuse_finish_open(inode, file);
}
- file->private_data = fuse_file_get(ff);
- fuse_finish_open(inode, file);
- return 0;
+ return err;
- out_free_ff:
+out_free_ff:
fuse_file_free(ff);
- out_put_request:
+out_put_request:
fuse_put_request(fc, req);
- out_put_forget_req:
- fuse_put_request(fc, forget_req);
+out_put_forget_req:
+ kfree(forget);
+out_err:
return err;
}
+static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
+ struct file *file, unsigned flags,
+ umode_t mode, int *opened)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct dentry *res = NULL;
+
+ if (d_unhashed(entry)) {
+ res = fuse_lookup(dir, entry, 0);
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+
+ if (res)
+ entry = res;
+ }
+
+ if (!(flags & O_CREAT) || entry->d_inode)
+ goto no_open;
+
+ /* Only creates */
+ *opened |= FILE_CREATED;
+
+ if (fc->no_create)
+ goto mknod;
+
+ err = fuse_create_open(dir, entry, file, flags, mode, opened);
+ if (err == -ENOSYS) {
+ fc->no_create = 1;
+ goto mknod;
+ }
+out_dput:
+ dput(res);
+ return err;
+
+mknod:
+ err = fuse_mknod(dir, entry, mode, 0);
+ if (err)
+ goto out_dput;
+no_open:
+ return finish_no_open(file, res);
+}
+
/*
* Code shared between mknod, mkdir, symlink and link
*/
static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
struct inode *dir, struct dentry *entry,
- int mode)
+ umode_t mode)
{
struct fuse_entry_out outarg;
struct inode *inode;
int err;
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
- forget_req = fuse_get_req(fc);
- if (IS_ERR(forget_req)) {
+ forget = fuse_alloc_forget();
+ if (!forget) {
fuse_put_request(fc, req);
- return PTR_ERR(forget_req);
+ return -ENOMEM;
}
memset(&outarg, 0, sizeof(outarg));
@@ -515,42 +589,30 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
&outarg.attr, entry_attr_timeout(&outarg), 0);
if (!inode) {
- fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
+ fuse_queue_forget(fc, forget, outarg.nodeid, 1);
return -ENOMEM;
}
- fuse_put_request(fc, forget_req);
-
- if (S_ISDIR(inode->i_mode)) {
- struct dentry *alias;
- mutex_lock(&fc->inst_mutex);
- alias = d_find_alias(inode);
- if (alias) {
- /* New directory must have moved since mkdir */
- mutex_unlock(&fc->inst_mutex);
- dput(alias);
- iput(inode);
- return -EBUSY;
- }
- d_instantiate(entry, inode);
- mutex_unlock(&fc->inst_mutex);
- } else
- d_instantiate(entry, inode);
+ kfree(forget);
+
+ err = d_instantiate_no_diralias(entry, inode);
+ if (err)
+ return err;
fuse_change_entry_timeout(entry, &outarg);
fuse_invalidate_attr(dir);
return 0;
out_put_forget_req:
- fuse_put_request(fc, forget_req);
+ kfree(forget);
return err;
}
-static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
+static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
dev_t rdev)
{
struct fuse_mknod_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -571,23 +633,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
return create_new_entry(fc, req, dir, entry, mode);
}
-static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
- struct nameidata *nd)
+static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
+ bool excl)
{
- if (nd && (nd->flags & LOOKUP_OPEN)) {
- int err = fuse_create_open(dir, entry, mode, nd);
- if (err != -ENOSYS)
- return err;
- /* Fall back on mknod */
- }
return fuse_mknod(dir, entry, mode, 0);
}
-static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
{
struct fuse_mkdir_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -611,7 +667,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
{
struct fuse_conn *fc = get_fuse_conn(dir);
unsigned len = strlen(link) + 1;
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -624,11 +680,19 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
return create_new_entry(fc, req, dir, entry, S_IFLNK);
}
+static inline void fuse_update_ctime(struct inode *inode)
+{
+ if (!IS_NOCMTIME(inode)) {
+ inode->i_ctime = current_fs_time(inode->i_sb);
+ mark_inode_dirty_sync(inode);
+ }
+}
+
static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -642,16 +706,23 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
fuse_put_request(fc, req);
if (!err) {
struct inode *inode = entry->d_inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
/*
- * Set nlink to zero so the inode can be cleared, if the inode
- * does have more links this will be discovered at the next
- * lookup/getattr.
+ * If i_nlink == 0 then unlink doesn't make sense, yet this can
+ * happen if userspace filesystem is careless. It would be
+ * difficult to enforce correct nlink usage so just ignore this
+ * condition here
*/
- clear_nlink(inode);
+ if (inode->i_nlink > 0)
+ drop_nlink(inode);
+ spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
fuse_invalidate_attr(dir);
fuse_invalidate_entry_cache(entry);
+ fuse_update_ctime(inode);
} else if (err == -EINTR)
fuse_invalidate_entry(entry);
return err;
@@ -661,7 +732,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -682,22 +753,26 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
return err;
}
-static int fuse_rename(struct inode *olddir, struct dentry *oldent,
- struct inode *newdir, struct dentry *newent)
+static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent,
+ unsigned int flags, int opcode, size_t argsize)
{
int err;
- struct fuse_rename_in inarg;
+ struct fuse_rename2_in inarg;
struct fuse_conn *fc = get_fuse_conn(olddir);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req;
+
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
- memset(&inarg, 0, sizeof(inarg));
+ memset(&inarg, 0, argsize);
inarg.newdir = get_node_id(newdir);
- req->in.h.opcode = FUSE_RENAME;
+ inarg.flags = flags;
+ req->in.h.opcode = opcode;
req->in.h.nodeid = get_node_id(olddir);
req->in.numargs = 3;
- req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].size = argsize;
req->in.args[0].value = &inarg;
req->in.args[1].size = oldent->d_name.len + 1;
req->in.args[1].value = oldent->d_name.name;
@@ -709,15 +784,22 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
if (!err) {
/* ctime changes */
fuse_invalidate_attr(oldent->d_inode);
+ fuse_update_ctime(oldent->d_inode);
+
+ if (flags & RENAME_EXCHANGE) {
+ fuse_invalidate_attr(newent->d_inode);
+ fuse_update_ctime(newent->d_inode);
+ }
fuse_invalidate_attr(olddir);
if (olddir != newdir)
fuse_invalidate_attr(newdir);
/* newent will end up negative */
- if (newent->d_inode) {
+ if (!(flags & RENAME_EXCHANGE) && newent->d_inode) {
fuse_invalidate_attr(newent->d_inode);
fuse_invalidate_entry_cache(newent);
+ fuse_update_ctime(newent->d_inode);
}
} else if (err == -EINTR) {
/* If request was interrupted, DEITY only knows if the
@@ -733,6 +815,42 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
return err;
}
+static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent,
+ unsigned int flags)
+{
+ struct fuse_conn *fc = get_fuse_conn(olddir);
+ int err;
+
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ return -EINVAL;
+
+ if (flags) {
+ if (fc->no_rename2 || fc->minor < 23)
+ return -EINVAL;
+
+ err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
+ FUSE_RENAME2,
+ sizeof(struct fuse_rename2_in));
+ if (err == -ENOSYS) {
+ fc->no_rename2 = 1;
+ err = -EINVAL;
+ }
+ } else {
+ err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
+ FUSE_RENAME,
+ sizeof(struct fuse_rename_in));
+ }
+
+ return err;
+}
+
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent)
+{
+ return fuse_rename2(olddir, oldent, newdir, newent, 0);
+}
+
static int fuse_link(struct dentry *entry, struct inode *newdir,
struct dentry *newent)
{
@@ -740,7 +858,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
struct fuse_link_in inarg;
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -759,20 +877,42 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
will reflect changes in the backing inode (link count,
etc.)
*/
- if (!err || err == -EINTR)
+ if (!err) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ inc_nlink(inode);
+ spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
+ fuse_update_ctime(inode);
+ } else if (err == -EINTR) {
+ fuse_invalidate_attr(inode);
+ }
return err;
}
static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
struct kstat *stat)
{
+ unsigned int blkbits;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ /* see the comment in fuse_change_attributes() */
+ if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
+ attr->size = i_size_read(inode);
+ attr->mtime = inode->i_mtime.tv_sec;
+ attr->mtimensec = inode->i_mtime.tv_nsec;
+ attr->ctime = inode->i_ctime.tv_sec;
+ attr->ctimensec = inode->i_ctime.tv_nsec;
+ }
+
stat->dev = inode->i_sb->s_dev;
stat->ino = attr->ino;
stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
stat->nlink = attr->nlink;
- stat->uid = attr->uid;
- stat->gid = attr->gid;
+ stat->uid = make_kuid(&init_user_ns, attr->uid);
+ stat->gid = make_kgid(&init_user_ns, attr->gid);
stat->rdev = inode->i_rdev;
stat->atime.tv_sec = attr->atime;
stat->atime.tv_nsec = attr->atimensec;
@@ -782,7 +922,13 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
stat->ctime.tv_nsec = attr->ctimensec;
stat->size = attr->size;
stat->blocks = attr->blocks;
- stat->blksize = (1 << inode->i_blkbits);
+
+ if (attr->blksize != 0)
+ blkbits = ilog2(attr->blksize);
+ else
+ blkbits = inode->i_sb->s_blocksize_bits;
+
+ stat->blksize = 1 << blkbits;
}
static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
@@ -795,7 +941,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
struct fuse_req *req;
u64 attr_version;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -846,7 +992,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
int err;
bool r;
- if (fi->i_time < get_jiffies_64()) {
+ if (time_before64(fi->i_time, get_jiffies_64())) {
r = true;
err = fuse_do_getattr(inode, stat, file);
} else {
@@ -855,6 +1001,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
if (stat) {
generic_fillattr(inode, stat);
stat->mode = fi->orig_i_mode;
+ stat->ino = fi->orig_ino;
}
}
@@ -865,7 +1012,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
}
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
- struct qstr *name)
+ u64 child_nodeid, struct qstr *name)
{
int err = -ENOTDIR;
struct inode *parent;
@@ -892,8 +1039,36 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
fuse_invalidate_attr(parent);
fuse_invalidate_entry(entry);
+
+ if (child_nodeid != 0 && entry->d_inode) {
+ mutex_lock(&entry->d_inode->i_mutex);
+ if (get_node_id(entry->d_inode) != child_nodeid) {
+ err = -ENOENT;
+ goto badentry;
+ }
+ if (d_mountpoint(entry)) {
+ err = -EBUSY;
+ goto badentry;
+ }
+ if (S_ISDIR(entry->d_inode->i_mode)) {
+ shrink_dcache_parent(entry);
+ if (!simple_empty(entry)) {
+ err = -ENOTEMPTY;
+ goto badentry;
+ }
+ entry->d_inode->i_flags |= S_DEAD;
+ }
+ dont_mount(entry);
+ clear_nlink(entry->d_inode);
+ err = 0;
+ badentry:
+ mutex_unlock(&entry->d_inode->i_mutex);
+ if (!err)
+ d_delete(entry);
+ } else {
+ err = 0;
+ }
dput(entry);
- err = 0;
unlock:
mutex_unlock(&parent->i_mutex);
@@ -903,7 +1078,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
/*
* Calling into a user-controlled filesystem gives the filesystem
- * daemon ptrace-like capabilities over the requester process. This
+ * daemon ptrace-like capabilities over the current process. This
* means, that the filesystem daemon is able to record the exact
* filesystem operations performed, and can also control the behavior
* of the requester process in otherwise impossible ways. For example
@@ -914,27 +1089,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
* for which the owner of the mount has ptrace privilege. This
* excludes processes started by other users, suid or sgid processes.
*/
-int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+int fuse_allow_current_process(struct fuse_conn *fc)
{
const struct cred *cred;
- int ret;
if (fc->flags & FUSE_ALLOW_OTHER)
return 1;
- rcu_read_lock();
- ret = 0;
- cred = __task_cred(task);
- if (cred->euid == fc->user_id &&
- cred->suid == fc->user_id &&
- cred->uid == fc->user_id &&
- cred->egid == fc->group_id &&
- cred->sgid == fc->group_id &&
- cred->gid == fc->group_id)
- ret = 1;
- rcu_read_unlock();
+ cred = current_cred();
+ if (uid_eq(cred->euid, fc->user_id) &&
+ uid_eq(cred->suid, fc->user_id) &&
+ uid_eq(cred->uid, fc->user_id) &&
+ gid_eq(cred->egid, fc->group_id) &&
+ gid_eq(cred->sgid, fc->group_id) &&
+ gid_eq(cred->gid, fc->group_id))
+ return 1;
- return ret;
+ return 0;
}
static int fuse_access(struct inode *inode, int mask)
@@ -944,10 +1115,12 @@ static int fuse_access(struct inode *inode, int mask)
struct fuse_access_in inarg;
int err;
+ BUG_ON(mask & MAY_NOT_BLOCK);
+
if (fc->no_access)
return 0;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -968,6 +1141,14 @@ static int fuse_access(struct inode *inode, int mask)
return err;
}
+static int fuse_perm_getattr(struct inode *inode, int mask)
+{
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+
+ return fuse_do_getattr(inode, NULL, NULL);
+}
+
/*
* Check permission. The two basic access models of FUSE are:
*
@@ -987,7 +1168,7 @@ static int fuse_permission(struct inode *inode, int mask)
bool refreshed = false;
int err = 0;
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
/*
@@ -995,21 +1176,27 @@ static int fuse_permission(struct inode *inode, int mask)
*/
if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
- err = fuse_update_attributes(inode, NULL, NULL, &refreshed);
- if (err)
- return err;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ if (time_before64(fi->i_time, get_jiffies_64())) {
+ refreshed = true;
+
+ err = fuse_perm_getattr(inode, mask);
+ if (err)
+ return err;
+ }
}
if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
- err = generic_permission(inode, mask, NULL);
+ err = generic_permission(inode, mask);
/* If permission is denied, try to refresh file
attributes. This is also needed, because the root
node will at first have no permissions */
if (err == -EACCES && !refreshed) {
- err = fuse_do_getattr(inode, NULL, NULL);
+ err = fuse_perm_getattr(inode, mask);
if (!err)
- err = generic_permission(inode, mask, NULL);
+ err = generic_permission(inode, mask);
}
/* Note: the opposite of the above test does not
@@ -1023,7 +1210,7 @@ static int fuse_permission(struct inode *inode, int mask)
if (refreshed)
return -EACCES;
- err = fuse_do_getattr(inode, NULL, NULL);
+ err = fuse_perm_getattr(inode, mask);
if (!err && !(inode->i_mode & S_IXUGO))
return -EACCES;
}
@@ -1032,43 +1219,197 @@ static int fuse_permission(struct inode *inode, int mask)
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
- void *dstbuf, filldir_t filldir)
+ struct dir_context *ctx)
{
while (nbytes >= FUSE_NAME_OFFSET) {
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
- int over;
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
+ if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+ return -EIO;
- over = filldir(dstbuf, dirent->name, dirent->namelen,
- file->f_pos, dirent->ino, dirent->type);
- if (over)
+ if (!dir_emit(ctx, dirent->name, dirent->namelen,
+ dirent->ino, dirent->type))
break;
buf += reclen;
nbytes -= reclen;
- file->f_pos = dirent->off;
+ ctx->pos = dirent->off;
}
return 0;
}
-static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+static int fuse_direntplus_link(struct file *file,
+ struct fuse_direntplus *direntplus,
+ u64 attr_version)
{
int err;
+ struct fuse_entry_out *o = &direntplus->entry_out;
+ struct fuse_dirent *dirent = &direntplus->dirent;
+ struct dentry *parent = file->f_path.dentry;
+ struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
+ struct dentry *dentry;
+ struct dentry *alias;
+ struct inode *dir = parent->d_inode;
+ struct fuse_conn *fc;
+ struct inode *inode;
+
+ if (!o->nodeid) {
+ /*
+ * Unlike in the case of fuse_lookup, zero nodeid does not mean
+ * ENOENT. Instead, it only means the userspace filesystem did
+ * not want to return attributes/handle for this entry.
+ *
+ * So do nothing.
+ */
+ return 0;
+ }
+
+ if (name.name[0] == '.') {
+ /*
+ * We could potentially refresh the attributes of the directory
+ * and its parent?
+ */
+ if (name.len == 1)
+ return 0;
+ if (name.name[1] == '.' && name.len == 2)
+ return 0;
+ }
+
+ if (invalid_nodeid(o->nodeid))
+ return -EIO;
+ if (!fuse_valid_type(o->attr.mode))
+ return -EIO;
+
+ fc = get_fuse_conn(dir);
+
+ name.hash = full_name_hash(name.name, name.len);
+ dentry = d_lookup(parent, &name);
+ if (dentry) {
+ inode = dentry->d_inode;
+ if (!inode) {
+ d_drop(dentry);
+ } else if (get_node_id(inode) != o->nodeid ||
+ ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+ err = d_invalidate(dentry);
+ if (err)
+ goto out;
+ } else if (is_bad_inode(inode)) {
+ err = -EIO;
+ goto out;
+ } else {
+ struct fuse_inode *fi;
+ fi = get_fuse_inode(inode);
+ spin_lock(&fc->lock);
+ fi->nlookup++;
+ spin_unlock(&fc->lock);
+
+ fuse_change_attributes(inode, &o->attr,
+ entry_attr_timeout(o),
+ attr_version);
+
+ /*
+ * The other branch to 'found' comes via fuse_iget()
+ * which bumps nlookup inside
+ */
+ goto found;
+ }
+ dput(dentry);
+ }
+
+ dentry = d_alloc(parent, &name);
+ err = -ENOMEM;
+ if (!dentry)
+ goto out;
+
+ inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
+ &o->attr, entry_attr_timeout(o), attr_version);
+ if (!inode)
+ goto out;
+
+ alias = d_materialise_unique(dentry, inode);
+ err = PTR_ERR(alias);
+ if (IS_ERR(alias))
+ goto out;
+
+ if (alias) {
+ dput(dentry);
+ dentry = alias;
+ }
+
+found:
+ if (fc->readdirplus_auto)
+ set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
+ fuse_change_entry_timeout(dentry, o);
+
+ err = 0;
+out:
+ dput(dentry);
+ return err;
+}
+
+static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
+ struct dir_context *ctx, u64 attr_version)
+{
+ struct fuse_direntplus *direntplus;
+ struct fuse_dirent *dirent;
+ size_t reclen;
+ int over = 0;
+ int ret;
+
+ while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
+ direntplus = (struct fuse_direntplus *) buf;
+ dirent = &direntplus->dirent;
+ reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
+
+ if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+ return -EIO;
+ if (reclen > nbytes)
+ break;
+ if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+ return -EIO;
+
+ if (!over) {
+ /* We fill entries into dstbuf only as much as
+ it can hold. But we still continue iterating
+ over remaining entries to link them. If not,
+ we need to send a FORGET for each of those
+ which we did not link.
+ */
+ over = !dir_emit(ctx, dirent->name, dirent->namelen,
+ dirent->ino, dirent->type);
+ ctx->pos = dirent->off;
+ }
+
+ buf += reclen;
+ nbytes -= reclen;
+
+ ret = fuse_direntplus_link(file, direntplus, attr_version);
+ if (ret)
+ fuse_force_forget(file, direntplus->entry_out.nodeid);
+ }
+
+ return 0;
+}
+
+static int fuse_readdir(struct file *file, struct dir_context *ctx)
+{
+ int plus, err;
size_t nbytes;
struct page *page;
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
+ u64 attr_version = 0;
if (is_bad_inode(inode))
return -EIO;
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1077,20 +1418,37 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
fuse_put_request(fc, req);
return -ENOMEM;
}
+
+ plus = fuse_use_readdirplus(inode, ctx);
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
- fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+ req->page_descs[0].length = PAGE_SIZE;
+ if (plus) {
+ attr_version = fuse_get_attr_version(fc);
+ fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
+ FUSE_READDIRPLUS);
+ } else {
+ fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
+ FUSE_READDIR);
+ }
fuse_request_send(fc, req);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
- if (!err)
- err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
- filldir);
+ if (!err) {
+ if (plus) {
+ err = parse_dirplusfile(page_address(page), nbytes,
+ file, ctx,
+ attr_version);
+ } else {
+ err = parse_dirfile(page_address(page), nbytes, file,
+ ctx);
+ }
+ }
__free_page(page);
- fuse_invalidate_attr(inode); /* atime changed */
+ fuse_invalidate_atime(inode);
return err;
}
@@ -1098,7 +1456,7 @@ static char *read_link(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req = fuse_get_req_nopages(fc);
char *link;
if (IS_ERR(req))
@@ -1123,7 +1481,7 @@ static char *read_link(struct dentry *dentry)
link[req->out.args[0].size] = '\0';
out:
fuse_put_request(fc, req);
- fuse_invalidate_attr(inode); /* atime changed */
+ fuse_invalidate_atime(inode);
return link;
}
@@ -1156,17 +1514,46 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
return 0;
}
-static int fuse_dir_fsync(struct file *file, int datasync)
+static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ return fuse_fsync_common(file, start, end, datasync, 1);
+}
+
+static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
+
+ /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
+ if (fc->minor < 18)
+ return -ENOTTY;
+
+ return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
+}
+
+static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
- return fuse_fsync_common(file, datasync, 1);
+ struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
+
+ if (fc->minor < 18)
+ return -ENOTTY;
+
+ return fuse_ioctl_common(file, cmd, arg,
+ FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
}
-static bool update_mtime(unsigned ivalid)
+static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
{
/* Always update if mtime is explicitly set */
if (ivalid & ATTR_MTIME_SET)
return true;
+ /* Or if kernel i_mtime is the official one */
+ if (trust_local_mtime)
+ return true;
+
/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
return false;
@@ -1175,16 +1562,17 @@ static bool update_mtime(unsigned ivalid)
return true;
}
-static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
+static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
+ bool trust_local_cmtime)
{
unsigned ivalid = iattr->ia_valid;
if (ivalid & ATTR_MODE)
arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
if (ivalid & ATTR_UID)
- arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid;
+ arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
if (ivalid & ATTR_GID)
- arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid;
+ arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
if (ivalid & ATTR_SIZE)
arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
if (ivalid & ATTR_ATIME) {
@@ -1194,13 +1582,18 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
if (!(ivalid & ATTR_ATIME_SET))
arg->valid |= FATTR_ATIME_NOW;
}
- if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
+ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
arg->valid |= FATTR_MTIME;
arg->mtime = iattr->ia_mtime.tv_sec;
arg->mtimensec = iattr->ia_mtime.tv_nsec;
- if (!(ivalid & ATTR_MTIME_SET))
+ if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
arg->valid |= FATTR_MTIME_NOW;
}
+ if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
+ arg->valid |= FATTR_CTIME;
+ arg->ctime = iattr->ia_ctime.tv_sec;
+ arg->ctimensec = iattr->ia_ctime.tv_nsec;
+ }
}
/*
@@ -1247,6 +1640,62 @@ void fuse_release_nowrite(struct inode *inode)
spin_unlock(&fc->lock);
}
+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
+ struct inode *inode,
+ struct fuse_setattr_in *inarg_p,
+ struct fuse_attr_out *outarg_p)
+{
+ req->in.h.opcode = FUSE_SETATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*inarg_p);
+ req->in.args[0].value = inarg_p;
+ req->out.numargs = 1;
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(*outarg_p);
+ req->out.args[0].value = outarg_p;
+}
+
+/*
+ * Flush inode->i_mtime to the server
+ */
+int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_setattr_in inarg;
+ struct fuse_attr_out outarg;
+ int err;
+
+ req = fuse_get_req_nopages(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+
+ inarg.valid = FATTR_MTIME;
+ inarg.mtime = inode->i_mtime.tv_sec;
+ inarg.mtimensec = inode->i_mtime.tv_nsec;
+ if (fc->minor >= 23) {
+ inarg.valid |= FATTR_CTIME;
+ inarg.ctime = inode->i_ctime.tv_sec;
+ inarg.ctimensec = inode->i_ctime.tv_nsec;
+ }
+ if (ff) {
+ inarg.valid |= FATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+
+ return err;
+}
+
/*
* Set attributes, and at the same time refresh them.
*
@@ -1255,20 +1704,19 @@ void fuse_release_nowrite(struct inode *inode)
* vmtruncate() doesn't allow for this case, so do the rlimit checking
* and the actual truncation by hand.
*/
-static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
- struct file *file)
+int fuse_do_setattr(struct inode *inode, struct iattr *attr,
+ struct file *file)
{
- struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_req *req;
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
bool is_truncate = false;
+ bool is_wb = fc->writeback_cache;
loff_t oldsize;
int err;
-
- if (!fuse_allow_task(fc, current))
- return -EACCES;
+ bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
attr->ia_valid |= ATTR_FORCE;
@@ -1277,22 +1725,29 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
if (err)
return err;
- if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
- return 0;
+ if (attr->ia_valid & ATTR_OPEN) {
+ if (fc->atomic_o_trunc)
+ return 0;
+ file = NULL;
+ }
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
- if (is_truncate)
+ if (is_truncate) {
fuse_set_nowrite(inode);
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+ if (trust_local_cmtime && attr->ia_size != inode->i_size)
+ attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
+ }
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
- iattr_to_fattr(attr, &inarg);
+ iattr_to_fattr(attr, &inarg, trust_local_cmtime);
if (file) {
struct fuse_file *ff = file->private_data;
inarg.valid |= FATTR_FH;
@@ -1303,17 +1758,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
inarg.valid |= FATTR_LOCKOWNER;
inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
}
- req->in.h.opcode = FUSE_SETATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
- if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
- else
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
+ fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
fuse_request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
@@ -1330,10 +1775,21 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
}
spin_lock(&fc->lock);
+ /* the kernel maintains i_mtime locally */
+ if (trust_local_cmtime) {
+ if (attr->ia_valid & ATTR_MTIME)
+ inode->i_mtime = attr->ia_mtime;
+ if (attr->ia_valid & ATTR_CTIME)
+ inode->i_ctime = attr->ia_ctime;
+ /* FIXME: clear I_DIRTY_SYNC? */
+ }
+
fuse_change_attributes_common(inode, &outarg.attr,
attr_timeout(&outarg));
oldsize = inode->i_size;
- i_size_write(inode, outarg.attr.size);
+ /* see the comment in fuse_change_attributes() */
+ if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
+ i_size_write(inode, outarg.attr.size);
if (is_truncate) {
/* NOTE: this may release/reacquire fc->lock */
@@ -1345,26 +1801,34 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
* Only call invalidate_inode_pages2() after removing
* FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
*/
- if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
- truncate_pagecache(inode, oldsize, outarg.attr.size);
+ if ((is_truncate || !is_wb) &&
+ S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
+ truncate_pagecache(inode, outarg.attr.size);
invalidate_inode_pages2(inode->i_mapping);
}
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
return 0;
error:
if (is_truncate)
fuse_release_nowrite(inode);
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
return err;
}
static int fuse_setattr(struct dentry *entry, struct iattr *attr)
{
+ struct inode *inode = entry->d_inode;
+
+ if (!fuse_allow_current_process(get_fuse_conn(inode)))
+ return -EACCES;
+
if (attr->ia_valid & ATTR_FILE)
- return fuse_do_setattr(entry, attr, attr->ia_file);
+ return fuse_do_setattr(inode, attr, attr->ia_file);
else
- return fuse_do_setattr(entry, attr, NULL);
+ return fuse_do_setattr(inode, attr, NULL);
}
static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
@@ -1373,7 +1837,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
return fuse_update_attributes(inode, stat, NULL, NULL);
@@ -1391,7 +1855,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
if (fc->no_setxattr)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1414,6 +1878,10 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
fc->no_setxattr = 1;
err = -EOPNOTSUPP;
}
+ if (!err) {
+ fuse_invalidate_attr(inode);
+ fuse_update_ctime(inode);
+ }
return err;
}
@@ -1430,7 +1898,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
if (fc->no_getxattr)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1476,13 +1944,13 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
struct fuse_getxattr_out outarg;
ssize_t ret;
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
if (fc->no_listxattr)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1527,7 +1995,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
if (fc->no_removexattr)
return -EOPNOTSUPP;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1543,6 +2011,10 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
fc->no_removexattr = 1;
err = -EOPNOTSUPP;
}
+ if (!err) {
+ fuse_invalidate_attr(inode);
+ fuse_update_ctime(inode);
+ }
return err;
}
@@ -1553,9 +2025,11 @@ static const struct inode_operations fuse_dir_inode_operations = {
.unlink = fuse_unlink,
.rmdir = fuse_rmdir,
.rename = fuse_rename,
+ .rename2 = fuse_rename2,
.link = fuse_link,
.setattr = fuse_setattr,
.create = fuse_create,
+ .atomic_open = fuse_atomic_open,
.mknod = fuse_mknod,
.permission = fuse_permission,
.getattr = fuse_getattr,
@@ -1568,10 +2042,12 @@ static const struct inode_operations fuse_dir_inode_operations = {
static const struct file_operations fuse_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .readdir = fuse_readdir,
+ .iterate = fuse_readdir,
.open = fuse_dir_open,
.release = fuse_dir_release,
.fsync = fuse_dir_fsync,
+ .unlocked_ioctl = fuse_dir_ioctl,
+ .compat_ioctl = fuse_dir_compat_ioctl,
};
static const struct inode_operations fuse_common_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c8224587123..40ac2628ddc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -13,6 +13,10 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/module.h>
+#include <linux/compat.h>
+#include <linux/swap.h>
+#include <linux/aio.h>
+#include <linux/falloc.h>
static const struct file_operations fuse_direct_io_file_operations;
@@ -23,7 +27,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
struct fuse_req *req;
int err;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -55,7 +59,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
return NULL;
ff->fc = fc;
- ff->reserved_req = fuse_request_alloc();
+ ff->reserved_req = fuse_request_alloc(0);
if (unlikely(!ff->reserved_req)) {
kfree(ff);
return NULL;
@@ -85,18 +89,62 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
+static void fuse_release_async(struct work_struct *work)
+{
+ struct fuse_req *req;
+ struct fuse_conn *fc;
+ struct path path;
+
+ req = container_of(work, struct fuse_req, misc.release.work);
+ path = req->misc.release.path;
+ fc = get_fuse_conn(path.dentry->d_inode);
+
+ fuse_put_request(fc, req);
+ path_put(&path);
+}
+
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- path_put(&req->misc.release.path);
+ if (fc->destroy_req) {
+ /*
+ * If this is a fuseblk mount, then it's possible that
+ * releasing the path will result in releasing the
+ * super block and sending the DESTROY request. If
+ * the server is single threaded, this would hang.
+ * For this reason do the path_put() in a separate
+ * thread.
+ */
+ atomic_inc(&req->count);
+ INIT_WORK(&req->misc.release.work, fuse_release_async);
+ schedule_work(&req->misc.release.work);
+ } else {
+ path_put(&req->misc.release.path);
+ }
}
-static void fuse_file_put(struct fuse_file *ff)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
{
if (atomic_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
- req->end = fuse_release_end;
- fuse_request_send_background(ff->fc, req);
+ if (ff->fc->no_open) {
+ /*
+ * Drop the release request when client does not
+ * implement 'open'
+ */
+ req->background = 0;
+ path_put(&req->misc.release.path);
+ fuse_put_request(ff->fc, req);
+ } else if (sync) {
+ req->background = 0;
+ fuse_request_send(ff->fc, req);
+ path_put(&req->misc.release.path);
+ fuse_put_request(ff->fc, req);
+ } else {
+ req->end = fuse_release_end;
+ req->background = 1;
+ fuse_request_send_background(ff->fc, req);
+ }
kfree(ff);
}
}
@@ -104,36 +152,62 @@ static void fuse_file_put(struct fuse_file *ff)
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir)
{
- struct fuse_open_out outarg;
struct fuse_file *ff;
- int err;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
ff = fuse_file_alloc(fc);
if (!ff)
return -ENOMEM;
- err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
- if (err) {
- fuse_file_free(ff);
- return err;
+ ff->fh = 0;
+ ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */
+ if (!fc->no_open || isdir) {
+ struct fuse_open_out outarg;
+ int err;
+
+ err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ if (!err) {
+ ff->fh = outarg.fh;
+ ff->open_flags = outarg.open_flags;
+
+ } else if (err != -ENOSYS || isdir) {
+ fuse_file_free(ff);
+ return err;
+ } else {
+ fc->no_open = 1;
+ }
}
if (isdir)
- outarg.open_flags &= ~FOPEN_DIRECT_IO;
+ ff->open_flags &= ~FOPEN_DIRECT_IO;
- ff->fh = outarg.fh;
ff->nodeid = nodeid;
- ff->open_flags = outarg.open_flags;
file->private_data = fuse_file_get(ff);
return 0;
}
EXPORT_SYMBOL_GPL(fuse_do_open);
+static void fuse_link_write_file(struct file *file)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_file *ff = file->private_data;
+ /*
+ * file may be written through mmap, so chain it onto the
+ * inodes's write_file list
+ */
+ spin_lock(&fc->lock);
+ if (list_empty(&ff->write_entry))
+ list_add(&ff->write_entry, &fi->write_files);
+ spin_unlock(&fc->lock);
+}
+
void fuse_finish_open(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = get_fuse_conn(inode);
if (ff->open_flags & FOPEN_DIRECT_IO)
file->f_op = &fuse_direct_io_file_operations;
@@ -141,28 +215,45 @@ void fuse_finish_open(struct inode *inode, struct file *file)
invalidate_inode_pages2(inode->i_mapping);
if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ i_size_write(inode, 0);
+ spin_unlock(&fc->lock);
+ fuse_invalidate_attr(inode);
+ if (fc->writeback_cache)
+ file_update_time(file);
+ }
+ if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+ fuse_link_write_file(file);
}
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
{
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
-
- /* VFS checks this, but only _after_ ->open() */
- if (file->f_flags & O_DIRECT)
- return -EINVAL;
+ bool lock_inode = (file->f_flags & O_TRUNC) &&
+ fc->atomic_o_trunc &&
+ fc->writeback_cache;
err = generic_file_open(inode, file);
if (err)
return err;
+ if (lock_inode)
+ mutex_lock(&inode->i_mutex);
+
err = fuse_do_open(fc, get_node_id(inode), file, isdir);
- if (err)
- return err;
- fuse_finish_open(inode, file);
+ if (!err)
+ fuse_finish_open(inode, file);
+
+ if (lock_inode)
+ mutex_unlock(&inode->i_mutex);
- return 0;
+ return err;
}
static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
@@ -177,7 +268,7 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
rb_erase(&ff->polled_node, &fc->polled_files);
spin_unlock(&fc->lock);
- wake_up_interruptible_sync(&ff->poll_wait);
+ wake_up_interruptible_all(&ff->poll_wait);
inarg->fh = ff->fh;
inarg->flags = flags;
@@ -200,6 +291,12 @@ void fuse_release_common(struct file *file, int opcode)
req = ff->reserved_req;
fuse_prepare_release(ff, file->f_flags, opcode);
+ if (ff->flock) {
+ struct fuse_release_in *inarg = &req->misc.release.in;
+ inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
+ inarg->lock_owner = fuse_lock_owner_id(ff->fc,
+ (fl_owner_t) file);
+ }
/* Hold vfsmount and dentry until release is finished */
path_get(&file->f_path);
req->misc.release.path = file->f_path;
@@ -208,8 +305,12 @@ void fuse_release_common(struct file *file, int opcode)
* Normally this will send the RELEASE request, however if
* some asynchronous READ or WRITE requests are outstanding,
* the sending will be delayed.
+ *
+ * Make the release synchronous if this is a fuseblk mount,
+ * synchronous RELEASE is allowed (and desirable) in this case
+ * because the server can be trusted not to screw up.
*/
- fuse_file_put(ff);
+ fuse_file_put(ff, ff->fc->destroy_req != NULL);
}
static int fuse_open(struct inode *inode, struct file *file)
@@ -219,6 +320,12 @@ static int fuse_open(struct inode *inode, struct file *file)
static int fuse_release(struct inode *inode, struct file *file)
{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ /* see fuse_vma_close() for !writeback_cache case */
+ if (fc->writeback_cache)
+ write_inode_now(inode, 1);
+
fuse_release_common(file, FUSE_RELEASE);
/* return value is ignored by VFS */
@@ -230,6 +337,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags)
WARN_ON(atomic_read(&ff->count) > 1);
fuse_prepare_release(ff, flags, FUSE_RELEASE);
ff->reserved_req->force = 1;
+ ff->reserved_req->background = 0;
fuse_request_send(ff->fc, ff->reserved_req);
fuse_put_request(ff->fc, ff->reserved_req);
kfree(ff);
@@ -259,12 +367,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
}
/*
- * Check if page is under writeback
+ * Check if any page in a range is under writeback
*
* This is currently done by walking the list of writepage requests
* for the inode, which can be pretty inefficient.
*/
-static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
+static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
+ pgoff_t idx_to)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -277,7 +386,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
BUG_ON(req->inode != inode);
curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
- if (curr_index == index) {
+ if (idx_from < curr_index + req->num_pages &&
+ curr_index <= idx_to) {
found = true;
break;
}
@@ -287,6 +397,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
return found;
}
+static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
+{
+ return fuse_range_is_writeback(inode, index, index);
+}
+
/*
* Wait for page writeback to be completed.
*
@@ -301,9 +416,24 @@ static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
return 0;
}
+/*
+ * Wait for all pending writepages on the inode to finish.
+ *
+ * This is currently done by blocking further writes with FUSE_NOWRITE
+ * and waiting for all sent writes to complete.
+ *
+ * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
+ * could conflict with truncation.
+ */
+static void fuse_sync_writes(struct inode *inode)
+{
+ fuse_set_nowrite(inode);
+ fuse_release_nowrite(inode);
+}
+
static int fuse_flush(struct file *file, fl_owner_t id)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
struct fuse_req *req;
@@ -316,7 +446,15 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (fc->no_flush)
return 0;
- req = fuse_get_req_nofail(fc, file);
+ err = write_inode_now(inode, 1);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
+ fuse_sync_writes(inode);
+ mutex_unlock(&inode->i_mutex);
+
+ req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -336,22 +474,8 @@ static int fuse_flush(struct file *file, fl_owner_t id)
return err;
}
-/*
- * Wait for all pending writepages on the inode to finish.
- *
- * This is currently done by blocking further writes with FUSE_NOWRITE
- * and waiting for all sent writes to complete.
- *
- * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
- * could conflict with truncation.
- */
-static void fuse_sync_writes(struct inode *inode)
-{
- fuse_set_nowrite(inode);
- fuse_release_nowrite(inode);
-}
-
-int fuse_fsync_common(struct file *file, int datasync, int isdir)
+int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
+ int datasync, int isdir)
{
struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -363,23 +487,30 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
if (is_bad_inode(inode))
return -EIO;
- if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
- return 0;
+ mutex_lock(&inode->i_mutex);
/*
* Start writeback against all dirty pages of the inode, then
* wait for all outstanding writes, before sending the FSYNC
* request.
*/
- err = write_inode_now(inode, 0);
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
- return err;
+ goto out;
fuse_sync_writes(inode);
+ err = sync_inode_metadata(inode, 1);
+ if (err)
+ goto out;
- req = fuse_get_req(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
+ goto out;
+
+ req = fuse_get_req_nopages(fc);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
@@ -399,12 +530,15 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
fc->no_fsync = 1;
err = 0;
}
+out:
+ mutex_unlock(&inode->i_mutex);
return err;
}
-static int fuse_fsync(struct file *file, int datasync)
+static int fuse_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
- return fuse_fsync_common(file, datasync, 0);
+ return fuse_fsync_common(file, start, end, datasync, 0);
}
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
@@ -427,9 +561,114 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
req->out.args[0].size = count;
}
-static size_t fuse_send_read(struct fuse_req *req, struct file *file,
+static void fuse_release_user_pages(struct fuse_req *req, int write)
+{
+ unsigned i;
+
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+ if (write)
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+}
+
+/**
+ * In case of short read, the caller sets 'pos' to the position of
+ * actual end of fuse request in IO request. Otherwise, if bytes_requested
+ * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1.
+ *
+ * An example:
+ * User requested DIO read of 64K. It was splitted into two 32K fuse requests,
+ * both submitted asynchronously. The first of them was ACKed by userspace as
+ * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The
+ * second request was ACKed as short, e.g. only 1K was read, resulting in
+ * pos == 33K.
+ *
+ * Thus, when all fuse requests are completed, the minimal non-negative 'pos'
+ * will be equal to the length of the longest contiguous fragment of
+ * transferred data starting from the beginning of IO request.
+ */
+static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
+{
+ int left;
+
+ spin_lock(&io->lock);
+ if (err)
+ io->err = io->err ? : err;
+ else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes))
+ io->bytes = pos;
+
+ left = --io->reqs;
+ spin_unlock(&io->lock);
+
+ if (!left) {
+ long res;
+
+ if (io->err)
+ res = io->err;
+ else if (io->bytes >= 0 && io->write)
+ res = -EIO;
+ else {
+ res = io->bytes < 0 ? io->size : io->bytes;
+
+ if (!is_sync_kiocb(io->iocb)) {
+ struct inode *inode = file_inode(io->iocb->ki_filp);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ spin_unlock(&fc->lock);
+ }
+ }
+
+ aio_complete(io->iocb, res, 0);
+ kfree(io);
+ }
+}
+
+static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct fuse_io_priv *io = req->io;
+ ssize_t pos = -1;
+
+ fuse_release_user_pages(req, !io->write);
+
+ if (io->write) {
+ if (req->misc.write.in.size != req->misc.write.out.size)
+ pos = req->misc.write.in.offset - io->offset +
+ req->misc.write.out.size;
+ } else {
+ if (req->misc.read.in.size != req->out.args[0].size)
+ pos = req->misc.read.in.offset - io->offset +
+ req->out.args[0].size;
+ }
+
+ fuse_aio_complete(io, req->out.h.error, pos);
+}
+
+static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
+ size_t num_bytes, struct fuse_io_priv *io)
+{
+ spin_lock(&io->lock);
+ io->size += num_bytes;
+ io->reqs++;
+ spin_unlock(&io->lock);
+
+ req->io = io;
+ req->end = fuse_aio_complete_req;
+
+ __fuse_get_request(req);
+ fuse_request_send_background(fc, req);
+
+ return num_bytes;
+}
+
+static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
loff_t pos, size_t count, fl_owner_t owner)
{
+ struct file *file = io->file;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
@@ -440,6 +679,10 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
inarg->read_flags |= FUSE_READ_LOCKOWNER;
inarg->lock_owner = fuse_lock_owner_id(fc, owner);
}
+
+ if (io->async)
+ return fuse_async_req_send(fc, req, count, io);
+
fuse_request_send(fc, req);
return req->out.args[0].size;
}
@@ -451,15 +694,43 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
struct fuse_inode *fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
- if (attr_ver == fi->attr_version && size < inode->i_size) {
+ if (attr_ver == fi->attr_version && size < inode->i_size &&
+ !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
fi->attr_version = ++fc->attr_version;
i_size_write(inode, size);
}
spin_unlock(&fc->lock);
}
-static int fuse_readpage(struct file *file, struct page *page)
+static void fuse_short_read(struct fuse_req *req, struct inode *inode,
+ u64 attr_ver)
{
+ size_t num_read = req->out.args[0].size;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (fc->writeback_cache) {
+ /*
+ * A hole in a file. Some data after the hole are in page cache,
+ * but have not reached the client fs yet. So, the hole is not
+ * present there.
+ */
+ int i;
+ int start_idx = num_read >> PAGE_CACHE_SHIFT;
+ size_t off = num_read & (PAGE_CACHE_SIZE - 1);
+
+ for (i = start_idx; i < req->num_pages; i++) {
+ zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
+ off = 0;
+ }
+ } else {
+ loff_t pos = page_offset(req->pages[0]) + num_read;
+ fuse_read_update_size(inode, pos, attr_ver);
+ }
+}
+
+static int fuse_do_readpage(struct file *file, struct page *page)
+{
+ struct fuse_io_priv io = { .async = 0, .file = file };
struct inode *inode = page->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
@@ -469,21 +740,16 @@ static int fuse_readpage(struct file *file, struct page *page)
u64 attr_ver;
int err;
- err = -EIO;
- if (is_bad_inode(inode))
- goto out;
-
/*
- * Page writeback can extend beyond the liftime of the
+ * Page writeback can extend beyond the lifetime of the
* page-cache page, so make sure we read a properly synced
* page.
*/
fuse_wait_on_page_writeback(inode, page->index);
- req = fuse_get_req(fc);
- err = PTR_ERR(req);
+ req = fuse_get_req(fc, 1);
if (IS_ERR(req))
- goto out;
+ return PTR_ERR(req);
attr_ver = fuse_get_attr_version(fc);
@@ -491,21 +757,36 @@ static int fuse_readpage(struct file *file, struct page *page)
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
- num_read = fuse_send_read(req, file, pos, count, NULL);
+ req->page_descs[0].length = count;
+ num_read = fuse_send_read(req, &io, pos, count, NULL);
err = req->out.h.error;
- fuse_put_request(fc, req);
if (!err) {
/*
* Short read means EOF. If file size is larger, truncate it
*/
if (num_read < count)
- fuse_read_update_size(inode, pos + num_read, attr_ver);
+ fuse_short_read(req, inode, attr_ver);
SetPageUptodate(page);
}
- fuse_invalidate_attr(inode); /* atime changed */
+ fuse_put_request(fc, req);
+
+ return err;
+}
+
+static int fuse_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ int err;
+
+ err = -EIO;
+ if (is_bad_inode(inode))
+ goto out;
+
+ err = fuse_do_readpage(file, page);
+ fuse_invalidate_atime(inode);
out:
unlock_page(page);
return err;
@@ -527,14 +808,10 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
/*
* Short read means EOF. If file size is larger, truncate it
*/
- if (!req->out.h.error && num_read < count) {
- loff_t pos;
+ if (!req->out.h.error && num_read < count)
+ fuse_short_read(req, inode, req->misc.read.attr_ver);
- pos = page_offset(req->pages[0]) + num_read;
- fuse_read_update_size(inode, pos,
- req->misc.read.attr_ver);
- }
- fuse_invalidate_attr(inode); /* atime changed */
+ fuse_invalidate_atime(inode);
}
for (i = 0; i < req->num_pages; i++) {
@@ -547,7 +824,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
page_cache_release(page);
}
if (req->ff)
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -577,6 +854,7 @@ struct fuse_fill_data {
struct fuse_req *req;
struct file *file;
struct inode *inode;
+ unsigned nr_pages;
};
static int fuse_readpages_fill(void *_data, struct page *page)
@@ -592,16 +870,31 @@ static int fuse_readpages_fill(void *_data, struct page *page)
(req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
(req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
+ int nr_alloc = min_t(unsigned, data->nr_pages,
+ FUSE_MAX_PAGES_PER_REQ);
fuse_send_readpages(req, data->file);
- data->req = req = fuse_get_req(fc);
+ if (fc->async_read)
+ req = fuse_get_req_for_background(fc, nr_alloc);
+ else
+ req = fuse_get_req(fc, nr_alloc);
+
+ data->req = req;
if (IS_ERR(req)) {
unlock_page(page);
return PTR_ERR(req);
}
}
+
+ if (WARN_ON(req->num_pages >= req->max_pages)) {
+ fuse_put_request(fc, req);
+ return -EIO;
+ }
+
page_cache_get(page);
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = PAGE_SIZE;
req->num_pages++;
+ data->nr_pages--;
return 0;
}
@@ -612,6 +905,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_data data;
int err;
+ int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
err = -EIO;
if (is_bad_inode(inode))
@@ -619,7 +913,11 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
data.file = file;
data.inode = inode;
- data.req = fuse_get_req(fc);
+ if (fc->async_read)
+ data.req = fuse_get_req_for_background(fc, nr_alloc);
+ else
+ data.req = fuse_get_req(fc, nr_alloc);
+ data.nr_pages = nr_pages;
err = PTR_ERR(data.req);
if (IS_ERR(data.req))
goto out;
@@ -635,23 +933,25 @@ out:
return err;
}
-static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
- if (pos + iov_length(iov, nr_segs) > i_size_read(inode)) {
+ /*
+ * In auto invalidate mode, always update attributes on read.
+ * Otherwise, only update if we attempt to read past EOF (to ensure
+ * i_size is up to date).
+ */
+ if (fc->auto_inval_data ||
+ (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
int err;
- /*
- * If trying to read past EOF, make sure the i_size
- * attribute is up-to-date.
- */
err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
if (err)
return err;
}
- return generic_file_aio_read(iocb, iov, nr_segs, pos);
+ return generic_file_read_iter(iocb, to);
}
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
@@ -677,9 +977,10 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
req->out.args[0].value = outarg;
}
-static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
loff_t pos, size_t count, fl_owner_t owner)
{
+ struct file *file = io->file;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
struct fuse_write_in *inarg = &req->misc.write.in;
@@ -690,88 +991,29 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
inarg->lock_owner = fuse_lock_owner_id(fc, owner);
}
- fuse_request_send(fc, req);
- return req->misc.write.out.size;
-}
-static int fuse_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ if (io->async)
+ return fuse_async_req_send(fc, req, count, io);
- *pagep = grab_cache_page_write_begin(mapping, index, flags);
- if (!*pagep)
- return -ENOMEM;
- return 0;
+ fuse_request_send(fc, req);
+ return req->misc.write.out.size;
}
-void fuse_write_update_size(struct inode *inode, loff_t pos)
+bool fuse_write_update_size(struct inode *inode, loff_t pos)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
+ bool ret = false;
spin_lock(&fc->lock);
fi->attr_version = ++fc->attr_version;
- if (pos > inode->i_size)
+ if (pos > inode->i_size) {
i_size_write(inode, pos);
- spin_unlock(&fc->lock);
-}
-
-static int fuse_buffered_write(struct file *file, struct inode *inode,
- loff_t pos, unsigned count, struct page *page)
-{
- int err;
- size_t nres;
- struct fuse_conn *fc = get_fuse_conn(inode);
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
- struct fuse_req *req;
-
- if (is_bad_inode(inode))
- return -EIO;
-
- /*
- * Make sure writepages on the same page are not mixed up with
- * plain writes.
- */
- fuse_wait_on_page_writeback(inode, page->index);
-
- req = fuse_get_req(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.argpages = 1;
- req->num_pages = 1;
- req->pages[0] = page;
- req->page_offset = offset;
- nres = fuse_send_write(req, file, pos, count, NULL);
- err = req->out.h.error;
- fuse_put_request(fc, req);
- if (!err && !nres)
- err = -EIO;
- if (!err) {
- pos += nres;
- fuse_write_update_size(inode, pos);
- if (count == PAGE_CACHE_SIZE)
- SetPageUptodate(page);
+ ret = true;
}
- fuse_invalidate_attr(inode);
- return err ? err : nres;
-}
-
-static int fuse_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- struct inode *inode = mapping->host;
- int res = 0;
-
- if (copied)
- res = fuse_buffered_write(file, inode, pos, copied, page);
+ spin_unlock(&fc->lock);
- unlock_page(page);
- page_cache_release(page);
- return res;
+ return ret;
}
static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
@@ -781,13 +1023,14 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
size_t res;
unsigned offset;
unsigned i;
+ struct fuse_io_priv io = { .async = 0, .file = file };
for (i = 0; i < req->num_pages; i++)
fuse_wait_on_page_writeback(inode, req->pages[i]->index);
- res = fuse_send_write(req, file, pos, count, NULL);
+ res = fuse_send_write(req, &io, pos, count, NULL);
- offset = req->page_offset;
+ offset = req->page_descs[0].offset;
count = res;
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
@@ -818,7 +1061,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
int err;
req->in.argpages = 1;
- req->page_offset = offset;
+ req->page_descs[0].offset = offset;
do {
size_t tmp;
@@ -842,9 +1085,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- pagefault_disable();
tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
- pagefault_enable();
flush_dcache_page(page);
if (!tmp) {
@@ -856,6 +1097,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
err = 0;
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = tmp;
req->num_pages++;
iov_iter_advance(ii, tmp);
@@ -868,28 +1110,41 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
if (!fc->big_writes)
break;
} while (iov_iter_count(ii) && count < fc->max_write &&
- req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
+ req->num_pages < req->max_pages && offset == 0);
return count > 0 ? count : err;
}
+static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
+{
+ return min_t(unsigned,
+ ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
+ (pos >> PAGE_CACHE_SHIFT) + 1,
+ FUSE_MAX_PAGES_PER_REQ);
+}
+
static ssize_t fuse_perform_write(struct file *file,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos)
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
int err = 0;
ssize_t res = 0;
if (is_bad_inode(inode))
return -EIO;
+ if (inode->i_size < pos + iov_iter_count(ii))
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
do {
struct fuse_req *req;
ssize_t count;
+ unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, nr_pages);
if (IS_ERR(req)) {
err = PTR_ERR(req);
break;
@@ -919,30 +1174,34 @@ static ssize_t fuse_perform_write(struct file *file,
if (res > 0)
fuse_write_update_size(inode, pos);
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
fuse_invalidate_attr(inode);
return res > 0 ? res : err;
}
-static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
- size_t count = 0;
+ size_t count = iov_iter_count(from);
ssize_t written = 0;
+ ssize_t written_buffered = 0;
struct inode *inode = mapping->host;
ssize_t err;
- struct iov_iter i;
+ loff_t endbyte = 0;
+ loff_t pos = iocb->ki_pos;
- WARN_ON(iocb->ki_pos != pos);
+ if (get_fuse_conn(inode)->writeback_cache) {
+ /* Update size (EOF optimization) and mode (SUID clearing) */
+ err = fuse_update_attributes(mapping->host, NULL, file, NULL);
+ if (err)
+ return err;
- err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
- if (err)
- return err;
+ return generic_file_write_iter(iocb, from);
+ }
mutex_lock(&inode->i_mutex);
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
@@ -954,17 +1213,45 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (count == 0)
goto out;
+ iov_iter_truncate(from, count);
err = file_remove_suid(file);
if (err)
goto out;
- file_update_time(file);
+ err = file_update_time(file);
+ if (err)
+ goto out;
+
+ if (file->f_flags & O_DIRECT) {
+ written = generic_file_direct_write(iocb, from, pos);
+ if (written < 0 || !iov_iter_count(from))
+ goto out;
+
+ pos += written;
+
+ written_buffered = fuse_perform_write(file, mapping, from, pos);
+ if (written_buffered < 0) {
+ err = written_buffered;
+ goto out;
+ }
+ endbyte = pos + written_buffered - 1;
+
+ err = filemap_write_and_wait_range(file->f_mapping, pos,
+ endbyte);
+ if (err)
+ goto out;
- iov_iter_init(&i, iov, nr_segs, count, 0);
- written = fuse_perform_write(file, mapping, &i, pos);
- if (written >= 0)
- iocb->ki_pos = pos + written;
+ invalidate_mapping_pages(file->f_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ endbyte >> PAGE_CACHE_SHIFT);
+ written += written_buffered;
+ iocb->ki_pos = pos + written_buffered;
+ } else {
+ written = fuse_perform_write(file, mapping, from, pos);
+ if (written >= 0)
+ iocb->ki_pos = pos + written;
+ }
out:
current->backing_dev_info = NULL;
mutex_unlock(&inode->i_mutex);
@@ -972,87 +1259,135 @@ out:
return written ? written : err;
}
-static void fuse_release_user_pages(struct fuse_req *req, int write)
+static inline void fuse_page_descs_length_init(struct fuse_req *req,
+ unsigned index, unsigned nr_pages)
{
- unsigned i;
+ int i;
- for (i = 0; i < req->num_pages; i++) {
- struct page *page = req->pages[i];
- if (write)
- set_page_dirty_lock(page);
- put_page(page);
- }
+ for (i = index; i < index + nr_pages; i++)
+ req->page_descs[i].length = PAGE_SIZE -
+ req->page_descs[i].offset;
+}
+
+static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
+{
+ return (unsigned long)ii->iov->iov_base + ii->iov_offset;
+}
+
+static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
+ size_t max_size)
+{
+ return min(iov_iter_single_seg_count(ii), max_size);
}
-static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
size_t *nbytesp, int write)
{
- size_t nbytes = *nbytesp;
- unsigned long user_addr = (unsigned long) buf;
- unsigned offset = user_addr & ~PAGE_MASK;
- int npages;
+ size_t nbytes = 0; /* # bytes already packed in req */
/* Special case for kernel I/O: can copy directly into the buffer */
- if (segment_eq(get_fs(), KERNEL_DS)) {
+ if (ii->type & ITER_KVEC) {
+ unsigned long user_addr = fuse_get_user_addr(ii);
+ size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
+
if (write)
req->in.args[1].value = (void *) user_addr;
else
req->out.args[0].value = (void *) user_addr;
+ iov_iter_advance(ii, frag_size);
+ *nbytesp = frag_size;
return 0;
}
- nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
- npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
- npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
- if (npages < 0)
- return npages;
+ while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
+ unsigned npages;
+ size_t start;
+ unsigned n = req->max_pages - req->num_pages;
+ ssize_t ret = iov_iter_get_pages(ii,
+ &req->pages[req->num_pages],
+ n * PAGE_SIZE, &start);
+ if (ret < 0)
+ return ret;
+
+ iov_iter_advance(ii, ret);
+ nbytes += ret;
+
+ ret += start;
+ npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
- req->num_pages = npages;
- req->page_offset = offset;
+ req->page_descs[req->num_pages].offset = start;
+ fuse_page_descs_length_init(req, req->num_pages, npages);
+
+ req->num_pages += npages;
+ req->page_descs[req->num_pages - 1].length -=
+ (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
+ }
if (write)
req->in.argpages = 1;
else
req->out.argpages = 1;
- nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
- *nbytesp = min(*nbytesp, nbytes);
+ *nbytesp = nbytes;
return 0;
}
-ssize_t fuse_direct_io(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos, int write)
+static inline int fuse_iter_npages(const struct iov_iter *ii_p)
{
+ return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
+}
+
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+ loff_t *ppos, int flags)
+{
+ int write = flags & FUSE_DIO_WRITE;
+ int cuse = flags & FUSE_DIO_CUSE;
+ struct file *file = io->file;
+ struct inode *inode = file->f_mapping->host;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
size_t nmax = write ? fc->max_write : fc->max_read;
loff_t pos = *ppos;
+ size_t count = iov_iter_count(iter);
+ pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
ssize_t res = 0;
struct fuse_req *req;
- req = fuse_get_req(fc);
+ if (io->async)
+ req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
+ else
+ req = fuse_get_req(fc, fuse_iter_npages(iter));
if (IS_ERR(req))
return PTR_ERR(req);
+ if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
+ if (!write)
+ mutex_lock(&inode->i_mutex);
+ fuse_sync_writes(inode);
+ if (!write)
+ mutex_unlock(&inode->i_mutex);
+ }
+
while (count) {
size_t nres;
fl_owner_t owner = current->files;
size_t nbytes = min(count, nmax);
- int err = fuse_get_user_pages(req, buf, &nbytes, write);
+ int err = fuse_get_user_pages(req, iter, &nbytes, write);
if (err) {
res = err;
break;
}
if (write)
- nres = fuse_send_write(req, file, pos, nbytes, owner);
+ nres = fuse_send_write(req, io, pos, nbytes, owner);
else
- nres = fuse_send_read(req, file, pos, nbytes, owner);
+ nres = fuse_send_read(req, io, pos, nbytes, owner);
- fuse_release_user_pages(req, !write);
+ if (!io->async)
+ fuse_release_user_pages(req, !write);
if (req->out.h.error) {
if (!res)
res = req->out.h.error;
@@ -1064,12 +1399,15 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
count -= nres;
res += nres;
pos += nres;
- buf += nres;
if (nres != nbytes)
break;
if (count) {
fuse_put_request(fc, req);
- req = fuse_get_req(fc);
+ if (io->async)
+ req = fuse_get_req_for_background(fc,
+ fuse_iter_npages(iter));
+ else
+ req = fuse_get_req(fc, fuse_iter_npages(iter));
if (IS_ERR(req))
break;
}
@@ -1083,16 +1421,49 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
}
EXPORT_SYMBOL_GPL(fuse_direct_io);
-static ssize_t fuse_direct_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
+ struct iov_iter *iter,
+ loff_t *ppos)
{
ssize_t res;
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct file *file = io->file;
+ struct inode *inode = file_inode(file);
if (is_bad_inode(inode))
return -EIO;
- res = fuse_direct_io(file, buf, count, ppos, 0);
+ res = fuse_direct_io(io, iter, ppos, 0);
+
+ fuse_invalidate_attr(inode);
+
+ return res;
+}
+
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct fuse_io_priv io = { .async = 0, .file = file };
+ struct iovec iov = { .iov_base = buf, .iov_len = count };
+ struct iov_iter ii;
+ iov_iter_init(&ii, READ, &iov, 1, count);
+ return __fuse_direct_read(&io, &ii, ppos);
+}
+
+static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
+ struct iov_iter *iter,
+ loff_t *ppos)
+{
+ struct file *file = io->file;
+ struct inode *inode = file_inode(file);
+ size_t count = iov_iter_count(iter);
+ ssize_t res;
+
+
+ res = generic_write_checks(file, ppos, &count, 0);
+ if (!res) {
+ iov_iter_truncate(iter, count);
+ res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
+ }
fuse_invalidate_attr(inode);
@@ -1102,31 +1473,35 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
+ struct inode *inode = file_inode(file);
ssize_t res;
+ struct fuse_io_priv io = { .async = 0, .file = file };
+ struct iov_iter ii;
+ iov_iter_init(&ii, WRITE, &iov, 1, count);
if (is_bad_inode(inode))
return -EIO;
/* Don't allow parallel writes to the same file */
mutex_lock(&inode->i_mutex);
- res = generic_write_checks(file, ppos, &count, 0);
- if (!res) {
- res = fuse_direct_io(file, buf, count, ppos, 1);
- if (res > 0)
- fuse_write_update_size(inode, *ppos);
- }
+ res = __fuse_direct_write(&io, &ii, ppos);
+ if (res > 0)
+ fuse_write_update_size(inode, *ppos);
mutex_unlock(&inode->i_mutex);
- fuse_invalidate_attr(inode);
-
return res;
}
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
- __free_page(req->pages[0]);
- fuse_file_put(req->ff);
+ int i;
+
+ for (i = 0; i < req->num_pages; i++)
+ __free_page(req->pages[i]);
+
+ if (req->ff)
+ fuse_file_put(req->ff, false);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1134,30 +1509,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
struct inode *inode = req->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+ int i;
list_del(&req->writepages_entry);
- dec_bdi_stat(bdi, BDI_WRITEBACK);
- dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
- bdi_writeout_inc(bdi);
+ for (i = 0; i < req->num_pages; i++) {
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
+ dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
+ bdi_writeout_inc(bdi);
+ }
wake_up(&fi->page_waitq);
}
/* Called under fc->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
+ loff_t size)
__releases(fc->lock)
__acquires(fc->lock)
{
struct fuse_inode *fi = get_fuse_inode(req->inode);
- loff_t size = i_size_read(req->inode);
struct fuse_write_in *inarg = &req->misc.write.in;
+ __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
if (!fc->connected)
goto out_free;
- if (inarg->offset + PAGE_CACHE_SIZE <= size) {
- inarg->size = PAGE_CACHE_SIZE;
+ if (inarg->offset + data_size <= size) {
+ inarg->size = data_size;
} else if (inarg->offset < size) {
- inarg->size = size & (PAGE_CACHE_SIZE - 1);
+ inarg->size = size - inarg->offset;
} else {
/* Got truncated off completely */
goto out_free;
@@ -1188,12 +1567,13 @@ __acquires(fc->lock)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
+ size_t crop = i_size_read(inode);
struct fuse_req *req;
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
req = list_entry(fi->queued_writes.next, struct fuse_req, list);
list_del_init(&req->list);
- fuse_send_writepage(fc, req);
+ fuse_send_writepage(fc, req, crop);
}
}
@@ -1204,12 +1584,85 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
mapping_set_error(inode->i_mapping, req->out.h.error);
spin_lock(&fc->lock);
+ while (req->misc.write.next) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_write_in *inarg = &req->misc.write.in;
+ struct fuse_req *next = req->misc.write.next;
+ req->misc.write.next = next->misc.write.next;
+ next->misc.write.next = NULL;
+ next->ff = fuse_file_get(req->ff);
+ list_add(&next->writepages_entry, &fi->writepages);
+
+ /*
+ * Skip fuse_flush_writepages() to make it easy to crop requests
+ * based on primary request size.
+ *
+ * 1st case (trivial): there are no concurrent activities using
+ * fuse_set/release_nowrite. Then we're on safe side because
+ * fuse_flush_writepages() would call fuse_send_writepage()
+ * anyway.
+ *
+ * 2nd case: someone called fuse_set_nowrite and it is waiting
+ * now for completion of all in-flight requests. This happens
+ * rarely and no more than once per page, so this should be
+ * okay.
+ *
+ * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
+ * of fuse_set_nowrite..fuse_release_nowrite section. The fact
+ * that fuse_set_nowrite returned implies that all in-flight
+ * requests were completed along with all of their secondary
+ * requests. Further primary requests are blocked by negative
+ * writectr. Hence there cannot be any in-flight requests and
+ * no invocations of fuse_writepage_end() while we're in
+ * fuse_set_nowrite..fuse_release_nowrite section.
+ */
+ fuse_send_writepage(fc, next, inarg->offset + inarg->size);
+ }
fi->writectr--;
fuse_writepage_finish(fc, req);
spin_unlock(&fc->lock);
fuse_writepage_free(fc, req);
}
+static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
+ struct fuse_inode *fi)
+{
+ struct fuse_file *ff = NULL;
+
+ spin_lock(&fc->lock);
+ if (!list_empty(&fi->write_files)) {
+ ff = list_entry(fi->write_files.next, struct fuse_file,
+ write_entry);
+ fuse_file_get(ff);
+ }
+ spin_unlock(&fc->lock);
+
+ return ff;
+}
+
+static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
+ struct fuse_inode *fi)
+{
+ struct fuse_file *ff = __fuse_write_file_get(fc, fi);
+ WARN_ON(!ff);
+ return ff;
+}
+
+int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_file *ff;
+ int err;
+
+ ff = __fuse_write_file_get(fc, fi);
+ err = fuse_flush_times(inode, ff);
+ if (ff)
+ fuse_file_put(ff, 0);
+
+ return err;
+}
+
static int fuse_writepage_locked(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -1217,39 +1670,40 @@ static int fuse_writepage_locked(struct page *page)
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_req *req;
- struct fuse_file *ff;
struct page *tmp_page;
+ int error = -ENOMEM;
set_page_writeback(page);
- req = fuse_request_alloc_nofs();
+ req = fuse_request_alloc_nofs(1);
if (!req)
goto err;
+ req->background = 1; /* writeback always goes to bg_queue */
tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (!tmp_page)
goto err_free;
- spin_lock(&fc->lock);
- BUG_ON(list_empty(&fi->write_files));
- ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
- req->ff = fuse_file_get(ff);
- spin_unlock(&fc->lock);
+ error = -EIO;
+ req->ff = fuse_write_file_get(fc, fi);
+ if (!req->ff)
+ goto err_nofile;
- fuse_write_fill(req, ff, page_offset(page), 0);
+ fuse_write_fill(req, req->ff, page_offset(page), 0);
copy_highpage(tmp_page, page);
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+ req->misc.write.next = NULL;
req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = tmp_page;
- req->page_offset = 0;
+ req->page_descs[0].offset = 0;
+ req->page_descs[0].length = PAGE_SIZE;
req->end = fuse_writepage_end;
req->inode = inode;
inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
- end_page_writeback(page);
spin_lock(&fc->lock);
list_add(&req->writepages_entry, &fi->writepages);
@@ -1257,25 +1711,344 @@ static int fuse_writepage_locked(struct page *page)
fuse_flush_writepages(inode);
spin_unlock(&fc->lock);
+ end_page_writeback(page);
+
return 0;
+err_nofile:
+ __free_page(tmp_page);
err_free:
fuse_request_free(req);
err:
end_page_writeback(page);
- return -ENOMEM;
+ return error;
}
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
{
int err;
+ if (fuse_page_is_writeback(page->mapping->host, page->index)) {
+ /*
+ * ->writepages() should be called for sync() and friends. We
+ * should only get here on direct reclaim and then we are
+ * allowed to skip a page which is already in flight
+ */
+ WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
+
+ redirty_page_for_writepage(wbc, page);
+ return 0;
+ }
+
err = fuse_writepage_locked(page);
unlock_page(page);
return err;
}
+struct fuse_fill_wb_data {
+ struct fuse_req *req;
+ struct fuse_file *ff;
+ struct inode *inode;
+ struct page **orig_pages;
+};
+
+static void fuse_writepages_send(struct fuse_fill_wb_data *data)
+{
+ struct fuse_req *req = data->req;
+ struct inode *inode = data->inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ int num_pages = req->num_pages;
+ int i;
+
+ req->ff = fuse_file_get(data->ff);
+ spin_lock(&fc->lock);
+ list_add_tail(&req->list, &fi->queued_writes);
+ fuse_flush_writepages(inode);
+ spin_unlock(&fc->lock);
+
+ for (i = 0; i < num_pages; i++)
+ end_page_writeback(data->orig_pages[i]);
+}
+
+static bool fuse_writepage_in_flight(struct fuse_req *new_req,
+ struct page *page)
+{
+ struct fuse_conn *fc = get_fuse_conn(new_req->inode);
+ struct fuse_inode *fi = get_fuse_inode(new_req->inode);
+ struct fuse_req *tmp;
+ struct fuse_req *old_req;
+ bool found = false;
+ pgoff_t curr_index;
+
+ BUG_ON(new_req->num_pages != 0);
+
+ spin_lock(&fc->lock);
+ list_del(&new_req->writepages_entry);
+ list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
+ BUG_ON(old_req->inode != new_req->inode);
+ curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ if (curr_index <= page->index &&
+ page->index < curr_index + old_req->num_pages) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ list_add(&new_req->writepages_entry, &fi->writepages);
+ goto out_unlock;
+ }
+
+ new_req->num_pages = 1;
+ for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
+ BUG_ON(tmp->inode != new_req->inode);
+ curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ if (tmp->num_pages == 1 &&
+ curr_index == page->index) {
+ old_req = tmp;
+ }
+ }
+
+ if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
+ old_req->state == FUSE_REQ_PENDING)) {
+ struct backing_dev_info *bdi = page->mapping->backing_dev_info;
+
+ copy_highpage(old_req->pages[0], page);
+ spin_unlock(&fc->lock);
+
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
+ dec_zone_page_state(page, NR_WRITEBACK_TEMP);
+ bdi_writeout_inc(bdi);
+ fuse_writepage_free(fc, new_req);
+ fuse_request_free(new_req);
+ goto out;
+ } else {
+ new_req->misc.write.next = old_req->misc.write.next;
+ old_req->misc.write.next = new_req;
+ }
+out_unlock:
+ spin_unlock(&fc->lock);
+out:
+ return found;
+}
+
+static int fuse_writepages_fill(struct page *page,
+ struct writeback_control *wbc, void *_data)
+{
+ struct fuse_fill_wb_data *data = _data;
+ struct fuse_req *req = data->req;
+ struct inode *inode = data->inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct page *tmp_page;
+ bool is_writeback;
+ int err;
+
+ if (!data->ff) {
+ err = -EIO;
+ data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
+ if (!data->ff)
+ goto out_unlock;
+ }
+
+ /*
+ * Being under writeback is unlikely but possible. For example direct
+ * read to an mmaped fuse file will set the page dirty twice; once when
+ * the pages are faulted with get_user_pages(), and then after the read
+ * completed.
+ */
+ is_writeback = fuse_page_is_writeback(inode, page->index);
+
+ if (req && req->num_pages &&
+ (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+ (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+ data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
+ fuse_writepages_send(data);
+ data->req = NULL;
+ }
+ err = -ENOMEM;
+ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (!tmp_page)
+ goto out_unlock;
+
+ /*
+ * The page must not be redirtied until the writeout is completed
+ * (i.e. userspace has sent a reply to the write request). Otherwise
+ * there could be more than one temporary page instance for each real
+ * page.
+ *
+ * This is ensured by holding the page lock in page_mkwrite() while
+ * checking fuse_page_is_writeback(). We already hold the page lock
+ * since clear_page_dirty_for_io() and keep it held until we add the
+ * request to the fi->writepages list and increment req->num_pages.
+ * After this fuse_page_is_writeback() will indicate that the page is
+ * under writeback, so we can release the page lock.
+ */
+ if (data->req == NULL) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ err = -ENOMEM;
+ req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
+ if (!req) {
+ __free_page(tmp_page);
+ goto out_unlock;
+ }
+
+ fuse_write_fill(req, data->ff, page_offset(page), 0);
+ req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+ req->misc.write.next = NULL;
+ req->in.argpages = 1;
+ req->background = 1;
+ req->num_pages = 0;
+ req->end = fuse_writepage_end;
+ req->inode = inode;
+
+ spin_lock(&fc->lock);
+ list_add(&req->writepages_entry, &fi->writepages);
+ spin_unlock(&fc->lock);
+
+ data->req = req;
+ }
+ set_page_writeback(page);
+
+ copy_highpage(tmp_page, page);
+ req->pages[req->num_pages] = tmp_page;
+ req->page_descs[req->num_pages].offset = 0;
+ req->page_descs[req->num_pages].length = PAGE_SIZE;
+
+ inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
+ inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+
+ err = 0;
+ if (is_writeback && fuse_writepage_in_flight(req, page)) {
+ end_page_writeback(page);
+ data->req = NULL;
+ goto out_unlock;
+ }
+ data->orig_pages[req->num_pages] = page;
+
+ /*
+ * Protected by fc->lock against concurrent access by
+ * fuse_page_is_writeback().
+ */
+ spin_lock(&fc->lock);
+ req->num_pages++;
+ spin_unlock(&fc->lock);
+
+out_unlock:
+ unlock_page(page);
+
+ return err;
+}
+
+static int fuse_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_fill_wb_data data;
+ int err;
+
+ err = -EIO;
+ if (is_bad_inode(inode))
+ goto out;
+
+ data.inode = inode;
+ data.req = NULL;
+ data.ff = NULL;
+
+ err = -ENOMEM;
+ data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
+ sizeof(struct page *),
+ GFP_NOFS);
+ if (!data.orig_pages)
+ goto out;
+
+ err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
+ if (data.req) {
+ /* Ignore errors if we can write at least one page */
+ BUG_ON(!data.req->num_pages);
+ fuse_writepages_send(&data);
+ err = 0;
+ }
+ if (data.ff)
+ fuse_file_put(data.ff, false);
+
+ kfree(data.orig_pages);
+out:
+ return err;
+}
+
+/*
+ * It's worthy to make sure that space is reserved on disk for the write,
+ * but how to implement it without killing performance need more thinking.
+ */
+static int fuse_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode);
+ struct page *page;
+ loff_t fsize;
+ int err = -ENOMEM;
+
+ WARN_ON(!fc->writeback_cache);
+
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page)
+ goto error;
+
+ fuse_wait_on_page_writeback(mapping->host, page->index);
+
+ if (PageUptodate(page) || len == PAGE_CACHE_SIZE)
+ goto success;
+ /*
+ * Check if the start this page comes after the end of file, in which
+ * case the readpage can be optimized away.
+ */
+ fsize = i_size_read(mapping->host);
+ if (fsize <= (pos & PAGE_CACHE_MASK)) {
+ size_t off = pos & ~PAGE_CACHE_MASK;
+ if (off)
+ zero_user_segment(page, 0, off);
+ goto success;
+ }
+ err = fuse_do_readpage(file, page);
+ if (err)
+ goto cleanup;
+success:
+ *pagep = page;
+ return 0;
+
+cleanup:
+ unlock_page(page);
+ page_cache_release(page);
+error:
+ return err;
+}
+
+static int fuse_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = page->mapping->host;
+
+ if (!PageUptodate(page)) {
+ /* Zero any unwritten bytes at the end of the page */
+ size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
+ if (endoff)
+ zero_user_segment(page, endoff, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ }
+
+ fuse_write_update_size(inode, pos + copied);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ return copied;
+}
+
static int fuse_launder_page(struct page *page)
{
int err = 0;
@@ -1315,38 +2088,32 @@ static void fuse_vma_close(struct vm_area_struct *vma)
static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
- /*
- * Don't use page->mapping as it may become NULL from a
- * concurrent truncate.
- */
- struct inode *inode = vma->vm_file->f_mapping->host;
+ struct inode *inode = file_inode(vma->vm_file);
+
+ file_update_time(vma->vm_file);
+ lock_page(page);
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ return VM_FAULT_NOPAGE;
+ }
fuse_wait_on_page_writeback(inode, page->index);
- return 0;
+ return VM_FAULT_LOCKED;
}
static const struct vm_operations_struct fuse_file_vm_ops = {
.close = fuse_vma_close,
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = fuse_page_mkwrite,
+ .remap_pages = generic_file_remap_pages,
};
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
- struct inode *inode = file->f_dentry->d_inode;
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
- struct fuse_file *ff = file->private_data;
- /*
- * file may be written through mmap, so chain it onto the
- * inodes's write_file list
- */
- spin_lock(&fc->lock);
- if (list_empty(&ff->write_entry))
- list_add(&ff->write_entry, &fi->write_files);
- spin_unlock(&fc->lock);
- }
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+ fuse_link_write_file(file);
+
file_accessed(file);
vma->vm_ops = &fuse_file_vm_ops;
return 0;
@@ -1392,7 +2159,7 @@ static void fuse_lk_fill(struct fuse_req *req, struct file *file,
const struct file_lock *fl, int opcode, pid_t pid,
int flock)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
struct fuse_lk_in *arg = &req->misc.lk_in;
@@ -1414,13 +2181,13 @@ static void fuse_lk_fill(struct fuse_req *req, struct file *file,
static int fuse_getlk(struct file *file, struct file_lock *fl)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
struct fuse_lk_out outarg;
int err;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1439,14 +2206,14 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
int err;
- if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+ if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
/* NLM needs asynchronous locks, which we don't support yet */
return -ENOLCK;
}
@@ -1455,7 +2222,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
if (fl->fl_flags & FL_CLOSE)
return 0;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1471,7 +2238,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
@@ -1494,15 +2261,17 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
- if (fc->no_lock) {
+ if (fc->no_flock) {
err = flock_lock_file_wait(file, fl);
} else {
+ struct fuse_file *ff = file->private_data;
+
/* emulate flock with POSIX locks */
- fl->fl_owner = (fl_owner_t) file;
+ ff->flock = true;
err = fuse_setlk(file, fl, 1);
}
@@ -1521,7 +2290,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
if (!inode->i_sb->s_bdev || fc->no_bmap)
return 0;
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return 0;
@@ -1545,32 +2314,21 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
return err ? 0 : outarg.block;
}
-static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
{
loff_t retval;
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
+
+ /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
+ if (whence == SEEK_CUR || whence == SEEK_SET)
+ return generic_file_llseek(file, offset, whence);
mutex_lock(&inode->i_mutex);
- switch (origin) {
- case SEEK_END:
- retval = fuse_update_attributes(inode, NULL, file, NULL);
- if (retval)
- goto exit;
- offset += i_size_read(inode);
- break;
- case SEEK_CUR:
- offset += file->f_pos;
- }
- retval = -EINVAL;
- if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
- retval = offset;
- }
-exit:
+ retval = fuse_update_attributes(inode, NULL, file, NULL);
+ if (!retval)
+ retval = generic_file_llseek(file, offset, whence);
mutex_unlock(&inode->i_mutex);
+
return retval;
}
@@ -1583,7 +2341,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
if (!bytes)
return 0;
- iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+ iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
while (iov_iter_count(&ii)) {
struct page *page = pages[page_idx++];
@@ -1618,6 +2376,94 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
}
/*
+ * CUSE servers compiled on 32bit broke on 64bit kernels because the
+ * ABI was defined to be 'struct iovec' which is different on 32bit
+ * and 64bit. Fortunately we can determine which structure the server
+ * used from the size of the reply.
+ */
+static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
+ size_t transferred, unsigned count,
+ bool is_compat)
+{
+#ifdef CONFIG_COMPAT
+ if (count * sizeof(struct compat_iovec) == transferred) {
+ struct compat_iovec *ciov = src;
+ unsigned i;
+
+ /*
+ * With this interface a 32bit server cannot support
+ * non-compat (i.e. ones coming from 64bit apps) ioctl
+ * requests
+ */
+ if (!is_compat)
+ return -EINVAL;
+
+ for (i = 0; i < count; i++) {
+ dst[i].iov_base = compat_ptr(ciov[i].iov_base);
+ dst[i].iov_len = ciov[i].iov_len;
+ }
+ return 0;
+ }
+#endif
+
+ if (count * sizeof(struct iovec) != transferred)
+ return -EIO;
+
+ memcpy(dst, src, transferred);
+ return 0;
+}
+
+/* Make sure iov_length() won't overflow */
+static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+{
+ size_t n;
+ u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+
+ for (n = 0; n < count; n++, iov++) {
+ if (iov->iov_len > (size_t) max)
+ return -ENOMEM;
+ max -= iov->iov_len;
+ }
+ return 0;
+}
+
+static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
+ void *src, size_t transferred, unsigned count,
+ bool is_compat)
+{
+ unsigned i;
+ struct fuse_ioctl_iovec *fiov = src;
+
+ if (fc->minor < 16) {
+ return fuse_copy_ioctl_iovec_old(dst, src, transferred,
+ count, is_compat);
+ }
+
+ if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
+ return -EIO;
+
+ for (i = 0; i < count; i++) {
+ /* Did the server supply an inappropriate value? */
+ if (fiov[i].base != (unsigned long) fiov[i].base ||
+ fiov[i].len != (unsigned long) fiov[i].len)
+ return -EIO;
+
+ dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
+ dst[i].iov_len = (size_t) fiov[i].len;
+
+#ifdef CONFIG_COMPAT
+ if (is_compat &&
+ (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
+ (compat_size_t) dst[i].iov_len != fiov[i].len))
+ return -EIO;
+#endif
+ }
+
+ return 0;
+}
+
+
+/*
* For ioctls, there is no generic way to determine how much memory
* needs to be read and/or written. Furthermore, ioctls are allowed
* to dereference the passed pointer, so the parameter requires deep
@@ -1677,18 +2523,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct fuse_ioctl_out outarg;
struct fuse_req *req = NULL;
struct page **pages = NULL;
- struct page *iov_page = NULL;
+ struct iovec *iov_page = NULL;
struct iovec *in_iov = NULL, *out_iov = NULL;
unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
size_t in_size, out_size, transferred;
int err;
+#if BITS_PER_LONG == 32
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#else
+ if (flags & FUSE_IOCTL_COMPAT)
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#endif
+
/* assume all the iovs returned by client always fits in a page */
- BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
- pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
- iov_page = alloc_page(GFP_KERNEL);
+ pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
+ iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!pages || !iov_page)
goto out;
@@ -1697,7 +2550,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
* RETRY from server is not allowed.
*/
if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
- struct iovec *iov = page_address(iov_page);
+ struct iovec *iov = iov_page;
iov->iov_base = (void __user *)arg;
iov->iov_len = _IOC_SIZE(cmd);
@@ -1735,7 +2588,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
num_pages++;
}
- req = fuse_get_req(fc);
+ req = fuse_get_req(fc, num_pages);
if (IS_ERR(req)) {
err = PTR_ERR(req);
req = NULL;
@@ -1743,6 +2596,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
}
memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
req->num_pages = num_pages;
+ fuse_page_descs_length_init(req, 0, req->num_pages);
/* okay, let's send it to the client */
req->in.h.opcode = FUSE_IOCTL;
@@ -1778,7 +2632,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
/* did it ask for retry? */
if (outarg.flags & FUSE_IOCTL_RETRY) {
- char *vaddr;
+ void *vaddr;
/* no retry if in restricted mode */
err = -EIO;
@@ -1798,18 +2652,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
goto out;
- err = -EIO;
- if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
+ vaddr = kmap_atomic(pages[0]);
+ err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
+ transferred, in_iovs + out_iovs,
+ (flags & FUSE_IOCTL_COMPAT) != 0);
+ kunmap_atomic(vaddr);
+ if (err)
goto out;
- /* okay, copy in iovs and retry */
- vaddr = kmap_atomic(pages[0], KM_USER0);
- memcpy(page_address(iov_page), vaddr, transferred);
- kunmap_atomic(vaddr, KM_USER0);
-
- in_iov = page_address(iov_page);
+ in_iov = iov_page;
out_iov = in_iov + in_iovs;
+ err = fuse_verify_ioctl_iov(in_iov, in_iovs);
+ if (err)
+ goto out;
+
+ err = fuse_verify_ioctl_iov(out_iov, out_iovs);
+ if (err)
+ goto out;
+
goto retry;
}
@@ -1821,8 +2682,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
out:
if (req)
fuse_put_request(fc, req);
- if (iov_page)
- __free_page(iov_page);
+ free_page((unsigned long) iov_page);
while (num_pages)
__free_page(pages[--num_pages]);
kfree(pages);
@@ -1831,13 +2691,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
}
EXPORT_SYMBOL_GPL(fuse_do_ioctl);
-static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int flags)
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+ unsigned long arg, unsigned int flags)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
- if (!fuse_allow_task(fc, current))
+ if (!fuse_allow_current_process(fc))
return -EACCES;
if (is_bad_inode(inode))
@@ -1849,13 +2709,13 @@ static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
static long fuse_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- return fuse_file_ioctl_common(file, cmd, arg, 0);
+ return fuse_ioctl_common(file, cmd, arg, 0);
}
static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
+ return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
}
/*
@@ -1899,7 +2759,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
{
spin_lock(&fc->lock);
if (RB_EMPTY_NODE(&ff->polled_node)) {
- struct rb_node **link, *parent;
+ struct rb_node **link, *uninitialized_var(parent);
link = fuse_find_polled_node(fc, ff->kh, &parent);
BUG_ON(*link);
@@ -1922,6 +2782,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
return DEFAULT_POLLMASK;
poll_wait(file, &ff->poll_wait, wait);
+ inarg.events = (__u32)poll_requested_events(wait);
/*
* Ask for notification iff there's someone waiting for it.
@@ -1932,7 +2793,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
fuse_register_polled_file(fc, ff);
}
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return POLLERR;
@@ -1982,12 +2843,198 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
return 0;
}
+static void fuse_do_truncate(struct file *file)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct iattr attr;
+
+ attr.ia_valid = ATTR_SIZE;
+ attr.ia_size = i_size_read(inode);
+
+ attr.ia_file = file;
+ attr.ia_valid |= ATTR_FILE;
+
+ fuse_do_setattr(inode, &attr, file);
+}
+
+static inline loff_t fuse_round_up(loff_t off)
+{
+ return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+}
+
+static ssize_t
+fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
+{
+ ssize_t ret = 0;
+ struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
+ bool async_dio = ff->fc->async_dio;
+ loff_t pos = 0;
+ struct inode *inode;
+ loff_t i_size;
+ size_t count = iov_iter_count(iter);
+ struct fuse_io_priv *io;
+
+ pos = offset;
+ inode = file->f_mapping->host;
+ i_size = i_size_read(inode);
+
+ if ((rw == READ) && (offset > i_size))
+ return 0;
+
+ /* optimization for short read */
+ if (async_dio && rw != WRITE && offset + count > i_size) {
+ if (offset >= i_size)
+ return 0;
+ count = min_t(loff_t, count, fuse_round_up(i_size - offset));
+ iov_iter_truncate(iter, count);
+ }
+
+ io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
+ if (!io)
+ return -ENOMEM;
+ spin_lock_init(&io->lock);
+ io->reqs = 1;
+ io->bytes = -1;
+ io->size = 0;
+ io->offset = offset;
+ io->write = (rw == WRITE);
+ io->err = 0;
+ io->file = file;
+ /*
+ * By default, we want to optimize all I/Os with async request
+ * submission to the client filesystem if supported.
+ */
+ io->async = async_dio;
+ io->iocb = iocb;
+
+ /*
+ * We cannot asynchronously extend the size of a file. We have no method
+ * to wait on real async I/O requests, so we must submit this request
+ * synchronously.
+ */
+ if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
+ io->async = false;
+
+ if (rw == WRITE)
+ ret = __fuse_direct_write(io, iter, &pos);
+ else
+ ret = __fuse_direct_read(io, iter, &pos);
+
+ if (io->async) {
+ fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
+
+ /* we have a non-extending, async request, so return */
+ if (!is_sync_kiocb(iocb))
+ return -EIOCBQUEUED;
+
+ ret = wait_on_sync_kiocb(iocb);
+ } else {
+ kfree(io);
+ }
+
+ if (rw == WRITE) {
+ if (ret > 0)
+ fuse_write_update_size(inode, pos);
+ else if (ret < 0 && offset + count > i_size)
+ fuse_do_truncate(file);
+ }
+
+ return ret;
+}
+
+static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
+ loff_t length)
+{
+ struct fuse_file *ff = file->private_data;
+ struct inode *inode = file->f_inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_req *req;
+ struct fuse_fallocate_in inarg = {
+ .fh = ff->fh,
+ .offset = offset,
+ .length = length,
+ .mode = mode
+ };
+ int err;
+ bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
+ (mode & FALLOC_FL_PUNCH_HOLE);
+
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+
+ if (fc->no_fallocate)
+ return -EOPNOTSUPP;
+
+ if (lock_inode) {
+ mutex_lock(&inode->i_mutex);
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ loff_t endbyte = offset + length - 1;
+ err = filemap_write_and_wait_range(inode->i_mapping,
+ offset, endbyte);
+ if (err)
+ goto out;
+
+ fuse_sync_writes(inode);
+ }
+ }
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+ req = fuse_get_req_nopages(fc);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+
+ req->in.h.opcode = FUSE_FALLOCATE;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ if (err == -ENOSYS) {
+ fc->no_fallocate = 1;
+ err = -EOPNOTSUPP;
+ }
+ fuse_put_request(fc, req);
+
+ if (err)
+ goto out;
+
+ /* we could have extended the file */
+ if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+ bool changed = fuse_write_update_size(inode, offset + length);
+
+ if (changed && fc->writeback_cache)
+ file_update_time(file);
+ }
+
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ truncate_pagecache_range(inode, offset, offset + length - 1);
+
+ fuse_invalidate_attr(inode);
+
+out:
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+ if (lock_inode)
+ mutex_unlock(&inode->i_mutex);
+
+ return err;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
- .read = do_sync_read,
- .aio_read = fuse_file_aio_read,
- .write = do_sync_write,
- .aio_write = fuse_file_aio_write,
+ .read = new_sync_read,
+ .read_iter = fuse_file_read_iter,
+ .write = new_sync_write,
+ .write_iter = fuse_file_write_iter,
.mmap = fuse_file_mmap,
.open = fuse_open,
.flush = fuse_flush,
@@ -1999,6 +3046,7 @@ static const struct file_operations fuse_file_operations = {
.unlocked_ioctl = fuse_file_ioctl,
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
+ .fallocate = fuse_file_fallocate,
};
static const struct file_operations fuse_direct_io_file_operations = {
@@ -2015,18 +3063,21 @@ static const struct file_operations fuse_direct_io_file_operations = {
.unlocked_ioctl = fuse_file_ioctl,
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
+ .fallocate = fuse_file_fallocate,
/* no splice_read */
};
static const struct address_space_operations fuse_file_aops = {
.readpage = fuse_readpage,
.writepage = fuse_writepage,
+ .writepages = fuse_writepages,
.launder_page = fuse_launder_page,
- .write_begin = fuse_write_begin,
- .write_end = fuse_write_end,
.readpages = fuse_readpages,
.set_page_dirty = __set_page_dirty_nobuffers,
.bmap = fuse_bmap,
+ .direct_IO = fuse_direct_IO,
+ .write_begin = fuse_write_begin,
+ .write_end = fuse_write_end,
};
void fuse_init_file_inode(struct inode *inode)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 57d4a3a0f10..e8e47a6ab51 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/poll.h>
+#include <linux/workqueue.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -43,6 +44,9 @@
doing the mount will be allowed to access the filesystem */
#define FUSE_ALLOW_OTHER (1 << 1)
+/** Number of page pointers embedded in fuse_req */
+#define FUSE_REQ_INLINE_PAGES 1
+
/** List of active connections */
extern struct list_head fuse_conn_list;
@@ -53,6 +57,12 @@ extern struct mutex fuse_mutex;
extern unsigned max_user_bgreq;
extern unsigned max_user_congthresh;
+/* One forget request */
+struct fuse_forget_link {
+ struct fuse_forget_one forget_one;
+ struct fuse_forget_link *next;
+};
+
/** FUSE inode */
struct fuse_inode {
/** Inode data */
@@ -66,14 +76,17 @@ struct fuse_inode {
u64 nlookup;
/** The request used for sending the FORGET message */
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
/** Time in jiffies until the file attributes are valid */
u64 i_time;
/** The sticky bit in inode->i_mode may have been removed, so
preserve the original mode */
- mode_t orig_i_mode;
+ umode_t orig_i_mode;
+
+ /** 64 bit inode number */
+ u64 orig_ino;
/** Version of last attribute change */
u64 attr_version;
@@ -93,6 +106,19 @@ struct fuse_inode {
/** List of writepage requestst (pending or sent) */
struct list_head writepages;
+
+ /** Miscellaneous bits describing inode state */
+ unsigned long state;
+};
+
+/** FUSE inode state bits */
+enum {
+ /** Advise readdirplus */
+ FUSE_I_ADVISE_RDPLUS,
+ /** Initialized with readdirplus */
+ FUSE_I_INIT_RDPLUS,
+ /** An operation changing file size is in progress */
+ FUSE_I_SIZE_UNSTABLE,
};
struct fuse_conn;
@@ -128,6 +154,9 @@ struct fuse_file {
/** Wait queue head for poll */
wait_queue_head_t poll_wait;
+
+ /** Has flock been performed on this file? */
+ bool flock:1;
};
/** One input argument of a request */
@@ -187,6 +216,12 @@ struct fuse_out {
struct fuse_arg args[3];
};
+/** FUSE page descriptor */
+struct fuse_page_desc {
+ unsigned int length;
+ unsigned int offset;
+};
+
/** The request state */
enum fuse_req_state {
FUSE_REQ_INIT = 0,
@@ -197,6 +232,20 @@ enum fuse_req_state {
FUSE_REQ_FINISHED
};
+/** The request IO state (for asynchronous processing) */
+struct fuse_io_priv {
+ int async;
+ spinlock_t lock;
+ unsigned reqs;
+ ssize_t bytes;
+ size_t size;
+ __u64 offset;
+ bool write;
+ int err;
+ struct kiocb *iocb;
+ struct file *file;
+};
+
/**
* A request to the client
*/
@@ -255,15 +304,16 @@ struct fuse_req {
/** Data for asynchronous requests */
union {
- struct fuse_forget_in forget_in;
struct {
- struct fuse_release_in in;
+ union {
+ struct fuse_release_in in;
+ struct work_struct work;
+ };
struct path path;
} release;
struct fuse_init_in init_in;
struct fuse_init_out init_out;
struct cuse_init_in cuse_init_in;
- struct cuse_init_out cuse_init_out;
struct {
struct fuse_read_in in;
u64 attr_ver;
@@ -271,26 +321,39 @@ struct fuse_req {
struct {
struct fuse_write_in in;
struct fuse_write_out out;
+ struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
struct fuse_lk_in lk_in;
} misc;
/** page vector */
- struct page *pages[FUSE_MAX_PAGES_PER_REQ];
+ struct page **pages;
+
+ /** page-descriptor vector */
+ struct fuse_page_desc *page_descs;
+
+ /** size of the 'pages' array */
+ unsigned max_pages;
+
+ /** inline page vector */
+ struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
+
+ /** inline page-descriptor vector */
+ struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
/** number of pages in vector */
unsigned num_pages;
- /** offset of data on first page */
- unsigned page_offset;
-
/** File used in the request (or NULL) */
struct fuse_file *ff;
/** Inode used in the request or NULL */
struct inode *inode;
+ /** AIO control block */
+ struct fuse_io_priv *io;
+
/** Link on fi->writepages */
struct list_head writepages_entry;
@@ -312,17 +375,16 @@ struct fuse_conn {
/** Lock protecting accessess to members of this structure */
spinlock_t lock;
- /** Mutex protecting against directory alias creation */
- struct mutex inst_mutex;
-
/** Refcount */
atomic_t count;
+ struct rcu_head rcu;
+
/** The user id for this mount */
- uid_t user_id;
+ kuid_t user_id;
/** The group id for this mount */
- gid_t group_id;
+ kgid_t group_id;
/** The fuse mount flags for this mount */
unsigned flags;
@@ -369,6 +431,17 @@ struct fuse_conn {
/** Pending interrupts */
struct list_head interrupts;
+ /** Queue of pending forgets */
+ struct fuse_forget_link forget_list_head;
+ struct fuse_forget_link *forget_list_tail;
+
+ /** Batching of FORGET requests (positive indicates FORGET batch) */
+ int forget_batch;
+
+ /** Flag indicating that INIT reply has been received. Allocating
+ * any fuse request will be suspended until the flag is set */
+ int initialized;
+
/** Flag indicating if connection is blocked. This will be
the case before the INIT reply is received, and if there
are too many outstading backgrounds requests */
@@ -407,11 +480,17 @@ struct fuse_conn {
/** Set if bdi is valid */
unsigned bdi_initialized:1;
+ /** write-back cache policy (default is write-through) */
+ unsigned writeback_cache:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
*/
+ /** Is open/release not implemented by fs? */
+ unsigned no_open:1;
+
/** Is fsync not implemented by fs? */
unsigned no_fsync:1;
@@ -433,7 +512,7 @@ struct fuse_conn {
/** Is removexattr not implemented by fs? */
unsigned no_removexattr:1;
- /** Are file locking primitives not implemented by fs? */
+ /** Are posix file locking primitives not implemented by fs? */
unsigned no_lock:1;
/** Is access not implemented by fs? */
@@ -457,6 +536,27 @@ struct fuse_conn {
/** Don't apply umask to creation modes */
unsigned dont_mask:1;
+ /** Are BSD file locking primitives not implemented by fs? */
+ unsigned no_flock:1;
+
+ /** Is fallocate not implemented by fs? */
+ unsigned no_fallocate:1;
+
+ /** Is rename with flags implemented by fs? */
+ unsigned no_rename2:1;
+
+ /** Use enhanced/automatic page cache invalidation. */
+ unsigned auto_inval_data:1;
+
+ /** Does the filesystem support readdirplus? */
+ unsigned do_readdirplus:1;
+
+ /** Does the filesystem want adaptive readdirplus? */
+ unsigned readdirplus_auto:1;
+
+ /** Does the filesystem support asynchronous direct-IO submission? */
+ unsigned async_dio:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
@@ -543,8 +643,13 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
/**
* Send FORGET command
*/
-void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
- u64 nodeid, u64 nlookup);
+void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
+ u64 nodeid, u64 nlookup);
+
+struct fuse_forget_link *fuse_alloc_forget(void);
+
+/* Used by READDIRPLUS */
+void fuse_force_forget(struct file *file, u64 nodeid);
/**
* Initialize READ or READDIR request
@@ -572,7 +677,8 @@ void fuse_release_common(struct file *file, int opcode);
/**
* Send FSYNC or FSYNCDIR request
*/
-int fuse_fsync_common(struct file *file, int datasync, int isdir);
+int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
+ int datasync, int isdir);
/**
* Notify poll wakeup
@@ -620,14 +726,14 @@ int fuse_dev_init(void);
void fuse_dev_cleanup(void);
int fuse_ctl_init(void);
-void fuse_ctl_cleanup(void);
+void __exit fuse_ctl_cleanup(void);
/**
* Allocate a request
*/
-struct fuse_req *fuse_request_alloc(void);
+struct fuse_req *fuse_request_alloc(unsigned npages);
-struct fuse_req *fuse_request_alloc_nofs(void);
+struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
/**
* Free a request
@@ -635,14 +741,32 @@ struct fuse_req *fuse_request_alloc_nofs(void);
void fuse_request_free(struct fuse_req *req);
/**
- * Get a request, may fail with -ENOMEM
+ * Get a request, may fail with -ENOMEM,
+ * caller should specify # elements in req->pages[] explicitly
*/
-struct fuse_req *fuse_get_req(struct fuse_conn *fc);
+struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
+struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
+ unsigned npages);
+
+/*
+ * Increment reference count on request
+ */
+void __fuse_get_request(struct fuse_req *req);
+
+/**
+ * Get a request, may fail with -ENOMEM,
+ * useful for callers who doesn't use req->pages[]
+ */
+static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
+{
+ return fuse_get_req(fc, 0);
+}
/**
* Gets a requests for a file operation, always succeeds
*/
-struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file);
+struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
+ struct file *file);
/**
* Decrement reference count of a request. If count goes to zero free
@@ -656,11 +780,6 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
/**
- * Send a request with no reply
- */
-void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
-
-/**
* Send a request in the background
*/
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
@@ -678,6 +797,8 @@ void fuse_invalidate_attr(struct inode *inode);
void fuse_invalidate_entry_cache(struct dentry *entry);
+void fuse_invalidate_atime(struct inode *inode);
+
/**
* Acquire reference to fuse_conn
*/
@@ -711,9 +832,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
int fuse_valid_type(int m);
/**
- * Is task allowed to perform filesystem operation?
+ * Is current process allowed to perform filesystem operation?
*/
-int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task);
+int fuse_allow_current_process(struct fuse_conn *fc);
u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
@@ -736,19 +857,44 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
/**
* File-system tells the kernel to invalidate parent attributes and
* the dentry matching parent/name.
+ *
+ * If the child_nodeid is non-zero and:
+ * - matches the inode number for the dentry matching parent/name,
+ * - is not a mount point
+ * - is a file or oan empty directory
+ * then the dentry is unhashed (d_delete()).
*/
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
- struct qstr *name);
+ u64 child_nodeid, struct qstr *name);
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir);
-ssize_t fuse_direct_io(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos, int write);
+
+/**
+ * fuse_direct_io() flags
+ */
+
+/** If set, it is WRITE; otherwise - READ */
+#define FUSE_DIO_WRITE (1 << 0)
+
+/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
+#define FUSE_DIO_CUSE (1 << 1)
+
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+ loff_t *ppos, int flags);
long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
unsigned int flags);
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+ unsigned long arg, unsigned int flags);
unsigned fuse_file_poll(struct file *file, poll_table *wait);
int fuse_dev_release(struct inode *inode, struct file *file);
-void fuse_write_update_size(struct inode *inode, loff_t pos);
+bool fuse_write_update_size(struct inode *inode, loff_t pos);
+
+int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
+int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
+
+int fuse_do_setattr(struct inode *inode, struct iattr *attr,
+ struct file *file);
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cfce3ad86a9..03246cd9d47 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh,
struct fuse_mount_data {
int fd;
unsigned rootmode;
- unsigned user_id;
- unsigned group_id;
+ kuid_t user_id;
+ kgid_t group_id;
unsigned fd_present:1;
unsigned rootmode_present:1;
unsigned user_id_present:1;
@@ -71,6 +71,11 @@ struct fuse_mount_data {
unsigned blksize;
};
+struct fuse_forget_link *fuse_alloc_forget(void)
+{
+ return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
+}
+
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
struct inode *inode;
@@ -86,12 +91,14 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->nlookup = 0;
fi->attr_version = 0;
fi->writectr = 0;
+ fi->orig_ino = 0;
+ fi->state = 0;
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
init_waitqueue_head(&fi->page_waitq);
- fi->forget_req = fuse_request_alloc();
- if (!fi->forget_req) {
+ fi->forget = fuse_alloc_forget();
+ if (!fi->forget) {
kmem_cache_free(fuse_inode_cachep, inode);
return NULL;
}
@@ -99,49 +106,54 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
return inode;
}
-static void fuse_destroy_inode(struct inode *inode)
+static void fuse_i_callback(struct rcu_head *head)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
- BUG_ON(!list_empty(&fi->write_files));
- BUG_ON(!list_empty(&fi->queued_writes));
- if (fi->forget_req)
- fuse_request_free(fi->forget_req);
+ struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(fuse_inode_cachep, inode);
}
-void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
- u64 nodeid, u64 nlookup)
+static void fuse_destroy_inode(struct inode *inode)
{
- struct fuse_forget_in *inarg = &req->misc.forget_in;
- inarg->nlookup = nlookup;
- req->in.h.opcode = FUSE_FORGET;
- req->in.h.nodeid = nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(struct fuse_forget_in);
- req->in.args[0].value = inarg;
- fuse_request_send_noreply(fc, req);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ BUG_ON(!list_empty(&fi->write_files));
+ BUG_ON(!list_empty(&fi->queued_writes));
+ kfree(fi->forget);
+ call_rcu(&inode->i_rcu, fuse_i_callback);
}
static void fuse_evict_inode(struct inode *inode)
{
- truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
if (inode->i_sb->s_flags & MS_ACTIVE) {
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
- fi->forget_req = NULL;
+ fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
+ fi->forget = NULL;
}
}
static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
{
+ sync_filesystem(sb);
if (*flags & MS_MANDLOCK)
return -EINVAL;
return 0;
}
+/*
+ * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
+ * so that it will fit.
+ */
+static ino_t fuse_squash_ino(u64 ino64)
+{
+ ino_t ino = (ino_t) ino64;
+ if (sizeof(ino_t) < sizeof(u64))
+ ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
+ return ino;
+}
+
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid)
{
@@ -151,18 +163,21 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
fi->attr_version = ++fc->attr_version;
fi->i_time = attr_valid;
- inode->i_ino = attr->ino;
+ inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
- inode->i_nlink = attr->nlink;
- inode->i_uid = attr->uid;
- inode->i_gid = attr->gid;
+ set_nlink(inode, attr->nlink);
+ inode->i_uid = make_kuid(&init_user_ns, attr->uid);
+ inode->i_gid = make_kgid(&init_user_ns, attr->gid);
inode->i_blocks = attr->blocks;
inode->i_atime.tv_sec = attr->atime;
inode->i_atime.tv_nsec = attr->atimensec;
- inode->i_mtime.tv_sec = attr->mtime;
- inode->i_mtime.tv_nsec = attr->mtimensec;
- inode->i_ctime.tv_sec = attr->ctime;
- inode->i_ctime.tv_nsec = attr->ctimensec;
+ /* mtime from server may be stale due to local buffered write */
+ if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
+ }
if (attr->blksize != 0)
inode->i_blkbits = ilog2(attr->blksize);
@@ -177,6 +192,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
fi->orig_i_mode = inode->i_mode;
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
inode->i_mode &= ~S_ISVTX;
+
+ fi->orig_ino = attr->ino;
}
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
@@ -184,23 +201,52 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
+ bool is_wb = fc->writeback_cache;
loff_t oldsize;
+ struct timespec old_mtime;
spin_lock(&fc->lock);
- if (attr_version != 0 && fi->attr_version > attr_version) {
+ if ((attr_version != 0 && fi->attr_version > attr_version) ||
+ test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
spin_unlock(&fc->lock);
return;
}
+ old_mtime = inode->i_mtime;
fuse_change_attributes_common(inode, attr, attr_valid);
oldsize = inode->i_size;
- i_size_write(inode, attr->size);
+ /*
+ * In case of writeback_cache enabled, the cached writes beyond EOF
+ * extend local i_size without keeping userspace server in sync. So,
+ * attr->size coming from server can be stale. We cannot trust it.
+ */
+ if (!is_wb || !S_ISREG(inode->i_mode))
+ i_size_write(inode, attr->size);
spin_unlock(&fc->lock);
- if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
- truncate_pagecache(inode, oldsize, attr->size);
- invalidate_inode_pages2(inode->i_mapping);
+ if (!is_wb && S_ISREG(inode->i_mode)) {
+ bool inval = false;
+
+ if (oldsize != attr->size) {
+ truncate_pagecache(inode, attr->size);
+ inval = true;
+ } else if (fc->auto_inval_data) {
+ struct timespec new_mtime = {
+ .tv_sec = attr->mtime,
+ .tv_nsec = attr->mtimensec,
+ };
+
+ /*
+ * Auto inval mode also checks and invalidates if mtime
+ * has changed.
+ */
+ if (!timespec_equal(&old_mtime, &new_mtime))
+ inval = true;
+ }
+
+ if (inval)
+ invalidate_inode_pages2(inode->i_mapping);
}
}
@@ -208,6 +254,10 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
inode->i_mode = attr->mode & S_IFMT;
inode->i_size = attr->size;
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
if (S_ISREG(inode->i_mode)) {
fuse_init_common(inode);
fuse_init_file_inode(inode);
@@ -254,7 +304,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
return NULL;
if ((inode->i_state & I_NEW)) {
- inode->i_flags |= S_NOATIME|S_NOCMTIME;
+ inode->i_flags |= S_NOATIME;
+ if (!fc->writeback_cache || !S_ISREG(attr->mode))
+ inode->i_flags |= S_NOCMTIME;
inode->i_generation = generation;
inode->i_data.backing_dev_info = &fc->bdi;
fuse_init_inode(inode, attr);
@@ -312,6 +364,7 @@ static void fuse_send_destroy(struct fuse_conn *fc)
fc->destroy_req = NULL;
req->in.h.opcode = FUSE_DESTROY;
req->force = 1;
+ req->background = 0;
fuse_request_send(fc, req);
fuse_put_request(fc, req);
}
@@ -328,17 +381,13 @@ void fuse_conn_kill(struct fuse_conn *fc)
spin_lock(&fc->lock);
fc->connected = 0;
fc->blocked = 0;
+ fc->initialized = 1;
spin_unlock(&fc->lock);
/* Flush all readers on this fs */
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
wake_up_all(&fc->waitq);
wake_up_all(&fc->blocked_waitq);
wake_up_all(&fc->reserved_req_waitq);
- mutex_lock(&fuse_mutex);
- list_del(&fc->entry);
- fuse_ctl_remove_conn(fc);
- mutex_unlock(&fuse_mutex);
- fuse_bdi_destroy(fc);
}
EXPORT_SYMBOL_GPL(fuse_conn_kill);
@@ -347,7 +396,14 @@ static void fuse_put_super(struct super_block *sb)
struct fuse_conn *fc = get_fuse_conn_super(sb);
fuse_send_destroy(fc);
+
fuse_conn_kill(fc);
+ mutex_lock(&fuse_mutex);
+ list_del(&fc->entry);
+ fuse_ctl_remove_conn(fc);
+ mutex_unlock(&fuse_mutex);
+ fuse_bdi_destroy(fc);
+
fuse_conn_put(fc);
}
@@ -373,12 +429,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
struct fuse_statfs_out outarg;
int err;
- if (!fuse_allow_task(fc, current)) {
+ if (!fuse_allow_current_process(fc)) {
buf->f_type = FUSE_SUPER_MAGIC;
return 0;
}
- req = fuse_get_req(fc);
+ req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -422,6 +478,17 @@ static const match_table_t tokens = {
{OPT_ERR, NULL}
};
+static int fuse_match_uint(substring_t *s, unsigned int *res)
+{
+ int err = -ENOMEM;
+ char *buf = match_strdup(s);
+ if (buf) {
+ err = kstrtouint(buf, 10, res);
+ kfree(buf);
+ }
+ return err;
+}
+
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
{
char *p;
@@ -432,6 +499,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
while ((p = strsep(&opt, ",")) != NULL) {
int token;
int value;
+ unsigned uv;
substring_t args[MAX_OPT_ARGS];
if (!*p)
continue;
@@ -455,16 +523,20 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
break;
case OPT_USER_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
+ return 0;
+ d->user_id = make_kuid(current_user_ns(), uv);
+ if (!uid_valid(d->user_id))
return 0;
- d->user_id = value;
d->user_id_present = 1;
break;
case OPT_GROUP_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
+ return 0;
+ d->group_id = make_kgid(current_user_ns(), uv);
+ if (!gid_valid(d->group_id))
return 0;
- d->group_id = value;
d->group_id_present = 1;
break;
@@ -500,21 +572,21 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
return 1;
}
-static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int fuse_show_options(struct seq_file *m, struct dentry *root)
{
- struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
+ struct super_block *sb = root->d_sb;
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
- seq_printf(m, ",user_id=%u", fc->user_id);
- seq_printf(m, ",group_id=%u", fc->group_id);
+ seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
+ seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
seq_puts(m, ",default_permissions");
if (fc->flags & FUSE_ALLOW_OTHER)
seq_puts(m, ",allow_other");
if (fc->max_read != ~0)
seq_printf(m, ",max_read=%u", fc->max_read);
- if (mnt->mnt_sb->s_bdev &&
- mnt->mnt_sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
- seq_printf(m, ",blksize=%lu", mnt->mnt_sb->s_blocksize);
+ if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
+ seq_printf(m, ",blksize=%lu", sb->s_blocksize);
return 0;
}
@@ -522,7 +594,6 @@ void fuse_conn_init(struct fuse_conn *fc)
{
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
- mutex_init(&fc->inst_mutex);
init_rwsem(&fc->killsb);
atomic_set(&fc->count, 1);
init_waitqueue_head(&fc->waitq);
@@ -534,13 +605,15 @@ void fuse_conn_init(struct fuse_conn *fc)
INIT_LIST_HEAD(&fc->interrupts);
INIT_LIST_HEAD(&fc->bg_queue);
INIT_LIST_HEAD(&fc->entry);
+ fc->forget_list_tail = &fc->forget_list_head;
atomic_set(&fc->num_waiting, 0);
fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
fc->khctr = 0;
fc->polled_files = RB_ROOT;
fc->reqctr = 0;
- fc->blocked = 1;
+ fc->blocked = 0;
+ fc->initialized = 0;
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
}
@@ -551,7 +624,6 @@ void fuse_conn_put(struct fuse_conn *fc)
if (atomic_dec_and_test(&fc->count)) {
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
- mutex_destroy(&fc->inst_mutex);
fc->release(fc);
}
}
@@ -618,10 +690,8 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
goto out_iput;
entry = d_obtain_alias(inode);
- if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
- entry->d_op = &fuse_dentry_operations;
+ if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
fuse_invalidate_entry_cache(entry);
- }
return entry;
@@ -631,17 +701,17 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
return ERR_PTR(err);
}
-static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
- int connectable)
+static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+ struct inode *parent)
{
- struct inode *inode = dentry->d_inode;
- bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
- int len = encode_parent ? 6 : 3;
+ int len = parent ? 6 : 3;
u64 nodeid;
u32 generation;
- if (*max_len < len)
- return 255;
+ if (*max_len < len) {
+ *max_len = len;
+ return FILEID_INVALID;
+ }
nodeid = get_fuse_inode(inode)->nodeid;
generation = inode->i_generation;
@@ -650,14 +720,9 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
fh[1] = (u32)(nodeid & 0xffffffff);
fh[2] = generation;
- if (encode_parent) {
- struct inode *parent;
-
- spin_lock(&dentry->d_lock);
- parent = dentry->d_parent->d_inode;
+ if (parent) {
nodeid = get_fuse_inode(parent)->nodeid;
generation = parent->i_generation;
- spin_unlock(&dentry->d_lock);
fh[3] = (u32)(nodeid >> 32);
fh[4] = (u32)(nodeid & 0xffffffff);
@@ -665,7 +730,7 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
}
*max_len = len;
- return encode_parent ? 0x82 : 0x81;
+ return parent ? 0x82 : 0x81;
}
static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
@@ -720,10 +785,8 @@ static struct dentry *fuse_get_parent(struct dentry *child)
}
parent = d_obtain_alias(inode);
- if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
- parent->d_op = &fuse_dentry_operations;
+ if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
fuse_invalidate_entry_cache(parent);
- }
return parent;
}
@@ -739,6 +802,7 @@ static const struct super_operations fuse_super_operations = {
.alloc_inode = fuse_alloc_inode,
.destroy_inode = fuse_destroy_inode,
.evict_inode = fuse_evict_inode,
+ .write_inode = fuse_write_inode,
.drop_inode = generic_delete_inode,
.remount_fs = fuse_remount_fs,
.put_super = fuse_put_super,
@@ -750,7 +814,7 @@ static const struct super_operations fuse_super_operations = {
static void sanitize_global_limit(unsigned *limit)
{
if (*limit == 0)
- *limit = ((num_physpages << PAGE_SHIFT) >> 13) /
+ *limit = ((totalram_pages << PAGE_SHIFT) >> 13) /
sizeof(struct fuse_req);
if (*limit >= 1 << 16)
@@ -812,6 +876,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->async_read = 1;
if (!(arg->flags & FUSE_POSIX_LOCKS))
fc->no_lock = 1;
+ if (arg->minor >= 17) {
+ if (!(arg->flags & FUSE_FLOCK_LOCKS))
+ fc->no_flock = 1;
+ } else {
+ if (!(arg->flags & FUSE_POSIX_LOCKS))
+ fc->no_flock = 1;
+ }
if (arg->flags & FUSE_ATOMIC_O_TRUNC)
fc->atomic_o_trunc = 1;
if (arg->minor >= 9) {
@@ -823,9 +894,23 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->big_writes = 1;
if (arg->flags & FUSE_DONT_MASK)
fc->dont_mask = 1;
+ if (arg->flags & FUSE_AUTO_INVAL_DATA)
+ fc->auto_inval_data = 1;
+ if (arg->flags & FUSE_DO_READDIRPLUS) {
+ fc->do_readdirplus = 1;
+ if (arg->flags & FUSE_READDIRPLUS_AUTO)
+ fc->readdirplus_auto = 1;
+ }
+ if (arg->flags & FUSE_ASYNC_DIO)
+ fc->async_dio = 1;
+ if (arg->flags & FUSE_WRITEBACK_CACHE)
+ fc->writeback_cache = 1;
+ if (arg->time_gran && arg->time_gran <= 1000000000)
+ fc->sb->s_time_gran = arg->time_gran;
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
+ fc->no_flock = 1;
}
fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
@@ -834,7 +919,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->max_write = max_t(unsigned, 4096, fc->max_write);
fc->conn_init = 1;
}
- fc->blocked = 0;
+ fc->initialized = 1;
wake_up_all(&fc->blocked_waitq);
}
@@ -846,7 +931,11 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
- FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
+ FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+ FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
+ FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+ FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
+ FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -864,7 +953,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_free_conn(struct fuse_conn *fc)
{
- kfree(fc);
+ kfree_rcu(fc, rcu);
}
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
@@ -873,9 +962,8 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
fc->bdi.name = "fuse";
fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
- fc->bdi.unplug_io_fn = default_unplug_io_fn;
/* fuse does it's own writeback accounting */
- fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+ fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
err = bdi_init(&fc->bdi);
if (err)
@@ -925,7 +1013,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_flags & MS_MANDLOCK)
goto err;
- if (!parse_fuse_opt((char *) data, &d, is_bdev))
+ sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
+
+ if (!parse_fuse_opt(data, &d, is_bdev))
goto err;
if (is_bdev) {
@@ -941,6 +1031,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_time_gran = 1;
sb->s_export_op = &fuse_export_operations;
file = fget(d.fd);
@@ -948,7 +1039,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (!file)
goto err;
- if (file->f_op != &fuse_dev_operations)
+ if ((file->f_op != &fuse_dev_operations) ||
+ (file->f_cred->user_ns != &init_user_ns))
goto err_fput;
fc = kmalloc(sizeof(*fc), GFP_KERNEL);
@@ -982,21 +1074,19 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
err = -ENOMEM;
root = fuse_get_root_inode(sb, d.rootmode);
- if (!root)
+ root_dentry = d_make_root(root);
+ if (!root_dentry)
goto err_put_conn;
+ /* only now - we want root dentry with NULL ->d_op */
+ sb->s_d_op = &fuse_dentry_operations;
- root_dentry = d_alloc_root(root);
- if (!root_dentry) {
- iput(root);
- goto err_put_conn;
- }
-
- init_req = fuse_request_alloc();
+ init_req = fuse_request_alloc(0);
if (!init_req)
goto err_put_root;
+ init_req->background = 1;
if (is_bdev) {
- fc->destroy_req = fuse_request_alloc();
+ fc->destroy_req = fuse_request_alloc(0);
if (!fc->destroy_req)
goto err_free_init_req;
}
@@ -1068,6 +1158,7 @@ static struct file_system_type fuse_fs_type = {
.mount = fuse_mount,
.kill_sb = fuse_kill_sb_anon,
};
+MODULE_ALIAS_FS("fuse");
#ifdef CONFIG_BLOCK
static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
@@ -1097,6 +1188,7 @@ static struct file_system_type fuseblk_fs_type = {
.kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
};
+MODULE_ALIAS_FS("fuseblk");
static inline int register_fuseblk(void)
{
@@ -1129,28 +1221,28 @@ static int __init fuse_fs_init(void)
{
int err;
- err = register_filesystem(&fuse_fs_type);
- if (err)
- goto out;
-
- err = register_fuseblk();
- if (err)
- goto out_unreg;
-
fuse_inode_cachep = kmem_cache_create("fuse_inode",
sizeof(struct fuse_inode),
0, SLAB_HWCACHE_ALIGN,
fuse_inode_init_once);
err = -ENOMEM;
if (!fuse_inode_cachep)
- goto out_unreg2;
+ goto out;
+
+ err = register_fuseblk();
+ if (err)
+ goto out2;
+
+ err = register_filesystem(&fuse_fs_type);
+ if (err)
+ goto out3;
return 0;
- out_unreg2:
+ out3:
unregister_fuseblk();
- out_unreg:
- unregister_filesystem(&fuse_fs_type);
+ out2:
+ kmem_cache_destroy(fuse_inode_cachep);
out:
return err;
}
@@ -1159,6 +1251,12 @@ static void fuse_fs_cleanup(void)
{
unregister_filesystem(&fuse_fs_type);
unregister_fuseblk();
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(fuse_inode_cachep);
}