From 0b05b18381eea98c9c9ada95629bf659a88c9374 Mon Sep 17 00:00:00 2001 From: "Anand V. Avati" Date: Sun, 19 Aug 2012 08:53:23 -0400 Subject: fuse: implement NFS-like readdirplus support This patch implements readdirplus support in FUSE, similar to NFS. The payload returned in the readdirplus call contains 'fuse_entry_out' structure thereby providing all the necessary inputs for 'faking' a lookup() operation on the spot. If the dentry and inode already existed (for e.g. in a re-run of ls -l) then just the inode attributes timeout and dentry timeout are refreshed. With a simple client->network->server implementation of a FUSE based filesystem, the following performance observations were made: Test: Performing a filesystem crawl over 20,000 files with sh# time ls -lR /mnt Without readdirplus: Run 1: 18.1s Run 2: 16.0s Run 3: 16.2s With readdirplus: Run 1: 4.1s Run 2: 3.8s Run 3: 3.8s The performance improvement is significant as it avoided 20,000 upcalls calls (lookup). Cache consistency is no worse than what already is. Signed-off-by: Anand V. Avati Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 19 +++++++ fs/fuse/dir.c | 160 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/fuse/fuse_i.h | 6 +++ fs/fuse/inode.c | 5 +- 4 files changed, 185 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e83351aa5ba..05c3eec298f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -491,6 +491,25 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, fuse_request_send_nowait_locked(fc, req); } +void fuse_force_forget(struct file *file, u64 nodeid) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_forget_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.nlookup = 1; + req = fuse_get_req_nofail(fc, file); + req->in.h.opcode = FUSE_FORGET; + req->in.h.nodeid = nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->isreply = 0; + fuse_request_send_nowait(fc, req); +} + /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7c09f9eb40..dcc1e522c7d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, return 0; } +static int fuse_direntplus_link(struct file *file, + struct fuse_direntplus *direntplus, + u64 attr_version) +{ + int err; + struct fuse_entry_out *o = &direntplus->entry_out; + struct fuse_dirent *dirent = &direntplus->dirent; + struct dentry *parent = file->f_path.dentry; + struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); + struct dentry *dentry; + struct dentry *alias; + struct inode *dir = parent->d_inode; + struct fuse_conn *fc; + struct inode *inode; + + if (!o->nodeid) { + /* + * Unlike in the case of fuse_lookup, zero nodeid does not mean + * ENOENT. Instead, it only means the userspace filesystem did + * not want to return attributes/handle for this entry. + * + * So do nothing. + */ + return 0; + } + + if (name.name[0] == '.') { + /* + * We could potentially refresh the attributes of the directory + * and its parent? + */ + if (name.len == 1) + return 0; + if (name.name[1] == '.' && name.len == 2) + return 0; + } + fc = get_fuse_conn(dir); + + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry && dentry->d_inode) { + inode = dentry->d_inode; + if (get_node_id(inode) == o->nodeid) { + struct fuse_inode *fi; + fi = get_fuse_inode(inode); + spin_lock(&fc->lock); + fi->nlookup++; + spin_unlock(&fc->lock); + + /* + * The other branch to 'found' comes via fuse_iget() + * which bumps nlookup inside + */ + goto found; + } + err = d_invalidate(dentry); + if (err) + goto out; + dput(dentry); + dentry = NULL; + } + + dentry = d_alloc(parent, &name); + err = -ENOMEM; + if (!dentry) + goto out; + + inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, + &o->attr, entry_attr_timeout(o), attr_version); + if (!inode) + goto out; + + alias = d_materialise_unique(dentry, inode); + err = PTR_ERR(alias); + if (IS_ERR(alias)) + goto out; + if (alias) { + dput(dentry); + dentry = alias; + } + +found: + fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), + attr_version); + + fuse_change_entry_timeout(dentry, o); + + err = 0; +out: + if (dentry) + dput(dentry); + return err; +} + +static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, + void *dstbuf, filldir_t filldir, u64 attr_version) +{ + struct fuse_direntplus *direntplus; + struct fuse_dirent *dirent; + size_t reclen; + int over = 0; + int ret; + + while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { + direntplus = (struct fuse_direntplus *) buf; + dirent = &direntplus->dirent; + reclen = FUSE_DIRENTPLUS_SIZE(direntplus); + + if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) + return -EIO; + if (reclen > nbytes) + break; + + if (!over) { + /* We fill entries into dstbuf only as much as + it can hold. But we still continue iterating + over remaining entries to link them. If not, + we need to send a FORGET for each of those + which we did not link. + */ + over = filldir(dstbuf, dirent->name, dirent->namelen, + file->f_pos, dirent->ino, + dirent->type); + file->f_pos = dirent->off; + } + + buf += reclen; + nbytes -= reclen; + + ret = fuse_direntplus_link(file, direntplus, attr_version); + if (ret) + fuse_force_forget(file, direntplus->entry_out.nodeid); + } + + return 0; +} + static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) { int err; @@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) struct inode *inode = file->f_path.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; + u64 attr_version = 0; if (is_bad_inode(inode)) return -EIO; @@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); + if (fc->do_readdirplus) { + attr_version = fuse_get_attr_version(fc); + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIRPLUS); + } else { + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIR); + } fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); - if (!err) - err = parse_dirfile(page_address(page), nbytes, file, dstbuf, - filldir); + if (!err) { + if (fc->do_readdirplus) { + err = parse_dirplusfile(page_address(page), nbytes, + file, dstbuf, filldir, + attr_version); + } else { + err = parse_dirfile(page_address(page), nbytes, file, + dstbuf, filldir); + } + } __free_page(page); fuse_invalidate_attr(inode); /* atime changed */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e105a53fc72..5c5055306d3 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -487,6 +487,9 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; + /** Does the filesystem support readdir-plus? */ + unsigned do_readdirplus:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -578,6 +581,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, struct fuse_forget_link *fuse_alloc_forget(void); +/* Used by READDIRPLUS */ +void fuse_force_forget(struct file *file, u64 nodeid); + /** * Initialize READ or READDIR request */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 73ca6b72bea..6f7d5746bf5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -863,6 +863,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->dont_mask = 1; if (arg->flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; + if (arg->flags & FUSE_DO_READDIRPLUS) + fc->do_readdirplus = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -889,7 +891,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | - FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; + FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | + FUSE_DO_READDIRPLUS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); -- cgit v1.2.3-18-g5258 From 4250c0668ea10a19f3d37b1733f54ce6c8a37234 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:48:07 +0400 Subject: fuse: general infrastructure for pages[] of variable size The patch removes inline array of FUSE_MAX_PAGES_PER_REQ page pointers from fuse_req. Instead of that, req->pages may now point either to small inline array or to an array allocated dynamically. This essentially means that all callers of fuse_request_alloc[_nofs] should pass the number of pages needed explicitly. The patch doesn't make any logic changes. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 47 ++++++++++++++++++++++++++++++++++------------- fs/fuse/file.c | 4 ++-- fs/fuse/fuse_i.h | 15 ++++++++++++--- fs/fuse/inode.c | 4 ++-- 4 files changed, 50 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 05c3eec298f..af37ae13825 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -34,34 +34,55 @@ static struct fuse_conn *fuse_get_conn(struct file *file) return file->private_data; } -static void fuse_request_init(struct fuse_req *req) +static void fuse_request_init(struct fuse_req *req, struct page **pages, + unsigned npages) { memset(req, 0, sizeof(*req)); + memset(pages, 0, sizeof(*pages) * npages); INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); atomic_set(&req->count, 1); + req->pages = pages; + req->max_pages = npages; } -struct fuse_req *fuse_request_alloc(void) +static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) { - struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); - if (req) - fuse_request_init(req); + struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags); + if (req) { + struct page **pages; + + if (npages <= FUSE_REQ_INLINE_PAGES) + pages = req->inline_pages; + else + pages = kmalloc(sizeof(struct page *) * npages, flags); + + if (!pages) { + kmem_cache_free(fuse_req_cachep, req); + return NULL; + } + + fuse_request_init(req, pages, npages); + } return req; } + +struct fuse_req *fuse_request_alloc(unsigned npages) +{ + return __fuse_request_alloc(npages, GFP_KERNEL); +} EXPORT_SYMBOL_GPL(fuse_request_alloc); -struct fuse_req *fuse_request_alloc_nofs(void) +struct fuse_req *fuse_request_alloc_nofs(unsigned npages) { - struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); - if (req) - fuse_request_init(req); - return req; + return __fuse_request_alloc(npages, GFP_NOFS); } void fuse_request_free(struct fuse_req *req) { + if (req->pages != req->inline_pages) + kfree(req->pages); kmem_cache_free(fuse_req_cachep, req); } @@ -116,7 +137,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) if (!fc->connected) goto out; - req = fuse_request_alloc(); + req = fuse_request_alloc(FUSE_MAX_PAGES_PER_REQ); err = -ENOMEM; if (!req) goto out; @@ -165,7 +186,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) struct fuse_file *ff = file->private_data; spin_lock(&fc->lock); - fuse_request_init(req); + fuse_request_init(req, req->pages, req->max_pages); BUG_ON(ff->reserved_req); ff->reserved_req = req; wake_up_all(&fc->reserved_req_waitq); @@ -192,7 +213,7 @@ struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) atomic_inc(&fc->num_waiting); wait_event(fc->blocked_waitq, !fc->blocked); - req = fuse_request_alloc(); + req = fuse_request_alloc(FUSE_MAX_PAGES_PER_REQ); if (!req) req = get_reserved_req(fc, file); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f3ab824fa30..2565f635c04 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) return NULL; ff->fc = fc; - ff->reserved_req = fuse_request_alloc(); + ff->reserved_req = fuse_request_alloc(0); if (unlikely(!ff->reserved_req)) { kfree(ff); return NULL; @@ -1272,7 +1272,7 @@ static int fuse_writepage_locked(struct page *page) set_page_writeback(page); - req = fuse_request_alloc_nofs(); + req = fuse_request_alloc_nofs(1); if (!req) goto err; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5c5055306d3..0c5b9310f93 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -44,6 +44,9 @@ doing the mount will be allowed to access the filesystem */ #define FUSE_ALLOW_OTHER (1 << 1) +/** Number of page pointers embedded in fuse_req */ +#define FUSE_REQ_INLINE_PAGES 1 + /** List of active connections */ extern struct list_head fuse_conn_list; @@ -291,7 +294,13 @@ struct fuse_req { } misc; /** page vector */ - struct page *pages[FUSE_MAX_PAGES_PER_REQ]; + struct page **pages; + + /** size of the 'pages' array */ + unsigned max_pages; + + /** inline page vector */ + struct page *inline_pages[FUSE_REQ_INLINE_PAGES]; /** number of pages in vector */ unsigned num_pages; @@ -664,9 +673,9 @@ void fuse_ctl_cleanup(void); /** * Allocate a request */ -struct fuse_req *fuse_request_alloc(void); +struct fuse_req *fuse_request_alloc(unsigned npages); -struct fuse_req *fuse_request_alloc_nofs(void); +struct fuse_req *fuse_request_alloc_nofs(unsigned npages); /** * Free a request diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6f7d5746bf5..9a937f0239e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1037,12 +1037,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) /* only now - we want root dentry with NULL ->d_op */ sb->s_d_op = &fuse_dentry_operations; - init_req = fuse_request_alloc(); + init_req = fuse_request_alloc(0); if (!init_req) goto err_put_root; if (is_bdev) { - fc->destroy_req = fuse_request_alloc(); + fc->destroy_req = fuse_request_alloc(0); if (!fc->destroy_req) goto err_free_init_req; } -- cgit v1.2.3-18-g5258 From b111c8c0e3e5e780ae0758fc4c1c376a7c9d5997 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:48:30 +0400 Subject: fuse: categorize fuse_get_req() The patch categorizes all fuse_get_req() invocations into two categories: - fuse_get_req_nopages(fc) - when caller doesn't care about req->pages - fuse_get_req(fc, n) - when caller need n page pointers (n > 0) Adding fuse_get_req_nopages() helps to avoid numerous fuse_get_req(fc, 0) scattered over code. Now it's clear from the first glance when a caller need fuse_req with page pointers. The patch doesn't make any logic changes. In multi-page case, it silly allocates array of FUSE_MAX_PAGES_PER_REQ page pointers. This will be amended by future patches. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 2 +- fs/fuse/dev.c | 13 +++++++------ fs/fuse/dir.c | 38 +++++++++++++++++++------------------- fs/fuse/file.c | 30 +++++++++++++++--------------- fs/fuse/fuse_i.h | 17 ++++++++++++++--- fs/fuse/inode.c | 2 +- 6 files changed, 57 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index e397b675b02..5cc838f320d 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -419,7 +419,7 @@ static int cuse_send_init(struct cuse_conn *cc) BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); - req = fuse_get_req(fc); + req = fuse_get_req(fc, 1); if (IS_ERR(req)) { rc = PTR_ERR(req); goto err; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index af37ae13825..ff5e8bed5d8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -118,7 +118,7 @@ static void fuse_req_init_context(struct fuse_req *req) req->in.h.pid = current->pid; } -struct fuse_req *fuse_get_req(struct fuse_conn *fc) +struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) { struct fuse_req *req; sigset_t oldset; @@ -137,7 +137,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) if (!fc->connected) goto out; - req = fuse_request_alloc(FUSE_MAX_PAGES_PER_REQ); + req = fuse_request_alloc(npages); err = -ENOMEM; if (!req) goto out; @@ -207,13 +207,14 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) * filesystem should not have it's own file open. If deadlock is * intentional, it can still be broken by "aborting" the filesystem. */ -struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) +struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, + struct file *file) { struct fuse_req *req; atomic_inc(&fc->num_waiting); wait_event(fc->blocked_waitq, !fc->blocked); - req = fuse_request_alloc(FUSE_MAX_PAGES_PER_REQ); + req = fuse_request_alloc(0); if (!req) req = get_reserved_req(fc, file); @@ -521,7 +522,7 @@ void fuse_force_forget(struct file *file, u64 nodeid) memset(&inarg, 0, sizeof(inarg)); inarg.nlookup = 1; - req = fuse_get_req_nofail(fc, file); + req = fuse_get_req_nofail_nopages(fc, file); req->in.h.opcode = FUSE_FORGET; req->in.h.nodeid = nodeid; req->in.numargs = 1; @@ -1577,7 +1578,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, unsigned int offset; size_t total_len = 0; - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dcc1e522c7d..d04bcc5cccd 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -178,7 +178,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) return -ECHILD; fc = get_fuse_conn(inode); - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return 0; @@ -271,7 +271,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, if (name->len > FUSE_NAME_MAX) goto out; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); err = PTR_ERR(req); if (IS_ERR(req)) goto out; @@ -391,7 +391,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, if (!forget) goto out_err; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); err = PTR_ERR(req); if (IS_ERR(req)) goto out_put_forget_req; @@ -592,7 +592,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, { struct fuse_mknod_in inarg; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -623,7 +623,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) { struct fuse_mkdir_in inarg; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -647,7 +647,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, { struct fuse_conn *fc = get_fuse_conn(dir); unsigned len = strlen(link) + 1; - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -664,7 +664,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -696,7 +696,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -723,7 +723,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, int err; struct fuse_rename_in inarg; struct fuse_conn *fc = get_fuse_conn(olddir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -776,7 +776,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, struct fuse_link_in inarg; struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -848,7 +848,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, struct fuse_req *req; u64 attr_version; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1029,7 +1029,7 @@ static int fuse_access(struct inode *inode, int mask) if (fc->no_access) return 0; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1305,7 +1305,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) if (is_bad_inode(inode)) return -EIO; - req = fuse_get_req(fc); + req = fuse_get_req(fc, 1); if (IS_ERR(req)) return PTR_ERR(req); @@ -1349,7 +1349,7 @@ static char *read_link(struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); char *link; if (IS_ERR(req)) @@ -1562,7 +1562,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, if (attr->ia_valid & ATTR_SIZE) is_truncate = true; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1670,7 +1670,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name, if (fc->no_setxattr) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1709,7 +1709,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, if (fc->no_getxattr) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1761,7 +1761,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) if (fc->no_listxattr) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1806,7 +1806,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name) if (fc->no_removexattr) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2565f635c04..882877125c1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -25,7 +25,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, struct fuse_req *req; int err; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -368,7 +368,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (fc->no_flush) return 0; - req = fuse_get_req_nofail(fc, file); + req = fuse_get_req_nofail_nopages(fc, file); memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; inarg.lock_owner = fuse_lock_owner_id(fc, id); @@ -436,7 +436,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, fuse_sync_writes(inode); - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; @@ -544,7 +544,7 @@ static int fuse_readpage(struct file *file, struct page *page) */ fuse_wait_on_page_writeback(inode, page->index); - req = fuse_get_req(fc); + req = fuse_get_req(fc, 1); err = PTR_ERR(req); if (IS_ERR(req)) goto out; @@ -657,7 +657,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || req->pages[req->num_pages - 1]->index + 1 != page->index)) { fuse_send_readpages(req, data->file); - data->req = req = fuse_get_req(fc); + data->req = req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) { unlock_page(page); return PTR_ERR(req); @@ -683,7 +683,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, data.file = file; data.inode = inode; - data.req = fuse_get_req(fc); + data.req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); err = PTR_ERR(data.req); if (IS_ERR(data.req)) goto out; @@ -890,7 +890,7 @@ static ssize_t fuse_perform_write(struct file *file, struct fuse_req *req; ssize_t count; - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) { err = PTR_ERR(req); break; @@ -1072,7 +1072,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, ssize_t res = 0; struct fuse_req *req; - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) return PTR_ERR(req); @@ -1108,7 +1108,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, break; if (count) { fuse_put_request(fc, req); - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) break; } @@ -1471,7 +1471,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) struct fuse_lk_out outarg; int err; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1506,7 +1506,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) if (fl->fl_flags & FL_CLOSE) return 0; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1575,7 +1575,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) if (!inode->i_sb->s_bdev || fc->no_bmap) return 0; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return 0; @@ -1873,7 +1873,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, num_pages++; } - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) { err = PTR_ERR(req); req = NULL; @@ -2076,7 +2076,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) fuse_register_polled_file(fc, ff); } - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return POLLERR; @@ -2194,7 +2194,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fc->no_fallocate) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0c5b9310f93..5b21e6ab9e7 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -683,14 +683,25 @@ struct fuse_req *fuse_request_alloc_nofs(unsigned npages); void fuse_request_free(struct fuse_req *req); /** - * Get a request, may fail with -ENOMEM + * Get a request, may fail with -ENOMEM, + * caller should specify # elements in req->pages[] explicitly */ -struct fuse_req *fuse_get_req(struct fuse_conn *fc); +struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages); + +/** + * Get a request, may fail with -ENOMEM, + * useful for callers who doesn't use req->pages[] + */ +static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc) +{ + return fuse_get_req(fc, 0); +} /** * Gets a requests for a file operation, always succeeds */ -struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file); +struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, + struct file *file); /** * Decrement reference count of a request. If count goes to zero free diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a937f0239e..9d95a5a3d55 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -413,7 +413,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); -- cgit v1.2.3-18-g5258 From 4d53dc99baf139e4fa0d395f7658032cc2eb3297 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:48:42 +0400 Subject: fuse: rework fuse_retrieve() The patch reworks fuse_retrieve() to allocate only so many page pointers as needed. The core part of the patch is the following calculation: num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; (thanks Miklos for formula). All other changes are mostly shuffling lines. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ff5e8bed5d8..28d9792de4a 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1577,13 +1577,24 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, unsigned int num; unsigned int offset; size_t total_len = 0; + int num_pages; - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + offset = outarg->offset & ~PAGE_CACHE_MASK; + file_size = i_size_read(inode); + + num = outarg->size; + if (outarg->offset > file_size) + num = 0; + else if (outarg->offset + num > file_size) + num = file_size - outarg->offset; + + num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ); + + req = fuse_get_req(fc, num_pages); if (IS_ERR(req)) return PTR_ERR(req); - offset = outarg->offset & ~PAGE_CACHE_MASK; - req->in.h.opcode = FUSE_NOTIFY_REPLY; req->in.h.nodeid = outarg->nodeid; req->in.numargs = 2; @@ -1592,14 +1603,8 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->end = fuse_retrieve_end; index = outarg->offset >> PAGE_CACHE_SHIFT; - file_size = i_size_read(inode); - num = outarg->size; - if (outarg->offset > file_size) - num = 0; - else if (outarg->offset + num > file_size) - num = file_size - outarg->offset; - while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { + while (num && req->num_pages < num_pages) { struct page *page; unsigned int this_num; -- cgit v1.2.3-18-g5258 From f8dbdf81821b5ab4c5e86e7b2bd7edb892c159c2 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:48:51 +0400 Subject: fuse: rework fuse_readpages() The patch uses 'nr_pages' argument of fuse_readpages() as heuristics for the number of page pointers to allocate. This can be improved further by taking in consideration fc->max_read and gaps between page indices, but it's not clear whether it's worthy or not. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 882877125c1..5fd06bae179 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -641,6 +641,7 @@ struct fuse_fill_data { struct fuse_req *req; struct file *file; struct inode *inode; + unsigned nr_pages; }; static int fuse_readpages_fill(void *_data, struct page *page) @@ -656,16 +657,25 @@ static int fuse_readpages_fill(void *_data, struct page *page) (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || req->pages[req->num_pages - 1]->index + 1 != page->index)) { + int nr_alloc = min_t(unsigned, data->nr_pages, + FUSE_MAX_PAGES_PER_REQ); fuse_send_readpages(req, data->file); - data->req = req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + data->req = req = fuse_get_req(fc, nr_alloc); if (IS_ERR(req)) { unlock_page(page); return PTR_ERR(req); } } + + if (WARN_ON(req->num_pages >= req->max_pages)) { + fuse_put_request(fc, req); + return -EIO; + } + page_cache_get(page); req->pages[req->num_pages] = page; req->num_pages++; + data->nr_pages--; return 0; } @@ -676,6 +686,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_fill_data data; int err; + int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ); err = -EIO; if (is_bad_inode(inode)) @@ -683,7 +694,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, data.file = file; data.inode = inode; - data.req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + data.req = fuse_get_req(fc, nr_alloc); + data.nr_pages = nr_pages; err = PTR_ERR(data.req); if (IS_ERR(data.req)) goto out; -- cgit v1.2.3-18-g5258 From d07f09f509fb21482096e1975f160b694c0edf84 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:49:00 +0400 Subject: fuse: rework fuse_perform_write() The patch allocates as many page pointers in fuse_req as needed to cover interval [pos .. pos+len-1]. Inline helper fuse_wr_pages() is introduced to hide this cumbersome arithmetic. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5fd06bae179..b9972502f43 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -881,11 +881,19 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (!fc->big_writes) break; } while (iov_iter_count(ii) && count < fc->max_write && - req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); + req->num_pages < req->max_pages && offset == 0); return count > 0 ? count : err; } +static inline unsigned fuse_wr_pages(loff_t pos, size_t len) +{ + return min_t(unsigned, + ((pos + len - 1) >> PAGE_CACHE_SHIFT) - + (pos >> PAGE_CACHE_SHIFT) + 1, + FUSE_MAX_PAGES_PER_REQ); +} + static ssize_t fuse_perform_write(struct file *file, struct address_space *mapping, struct iov_iter *ii, loff_t pos) @@ -901,8 +909,9 @@ static ssize_t fuse_perform_write(struct file *file, do { struct fuse_req *req; ssize_t count; + unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii)); - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + req = fuse_get_req(fc, nr_pages); if (IS_ERR(req)) { err = PTR_ERR(req); break; -- cgit v1.2.3-18-g5258 From 54b966702dafe396b6f4e609f222b8e0fdb4d7a4 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:49:13 +0400 Subject: fuse: rework fuse_do_ioctl() fuse_do_ioctl() already calculates the number of pages it's going to use. It is stored in 'num_pages' variable. So the patch simply uses it for allocating fuse_req. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b9972502f43..fdb5b33198a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1894,7 +1894,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, num_pages++; } - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + req = fuse_get_req(fc, num_pages); if (IS_ERR(req)) { err = PTR_ERR(req); req = NULL; -- cgit v1.2.3-18-g5258 From b2430d7567a376b3685627ca7e9d712f6f27d49b Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:49:24 +0400 Subject: fuse: add per-page descriptor to fuse_req The ability to save page pointers along with lengths and offsets in fuse_req will be useful to cover several iovec-s with a single fuse_req. Per-request page_offset is removed because anybody who need it can use req->page_descs[0].offset instead. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 28 ++++++++++++++++++++-------- fs/fuse/file.c | 10 +++++----- fs/fuse/fuse_i.h | 15 ++++++++++++--- 3 files changed, 37 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 28d9792de4a..db4af8f3886 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -35,15 +35,18 @@ static struct fuse_conn *fuse_get_conn(struct file *file) } static void fuse_request_init(struct fuse_req *req, struct page **pages, + struct fuse_page_desc *page_descs, unsigned npages) { memset(req, 0, sizeof(*req)); memset(pages, 0, sizeof(*pages) * npages); + memset(page_descs, 0, sizeof(*page_descs) * npages); INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); atomic_set(&req->count, 1); req->pages = pages; + req->page_descs = page_descs; req->max_pages = npages; } @@ -52,18 +55,25 @@ static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags); if (req) { struct page **pages; + struct fuse_page_desc *page_descs; - if (npages <= FUSE_REQ_INLINE_PAGES) + if (npages <= FUSE_REQ_INLINE_PAGES) { pages = req->inline_pages; - else + page_descs = req->inline_page_descs; + } else { pages = kmalloc(sizeof(struct page *) * npages, flags); + page_descs = kmalloc(sizeof(struct fuse_page_desc) * + npages, flags); + } - if (!pages) { + if (!pages || !page_descs) { + kfree(pages); + kfree(page_descs); kmem_cache_free(fuse_req_cachep, req); return NULL; } - fuse_request_init(req, pages, npages); + fuse_request_init(req, pages, page_descs, npages); } return req; } @@ -81,8 +91,10 @@ struct fuse_req *fuse_request_alloc_nofs(unsigned npages) void fuse_request_free(struct fuse_req *req) { - if (req->pages != req->inline_pages) + if (req->pages != req->inline_pages) { kfree(req->pages); + kfree(req->page_descs); + } kmem_cache_free(fuse_req_cachep, req); } @@ -186,7 +198,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) struct fuse_file *ff = file->private_data; spin_lock(&fc->lock); - fuse_request_init(req, req->pages, req->max_pages); + fuse_request_init(req, req->pages, req->page_descs, req->max_pages); BUG_ON(ff->reserved_req); ff->reserved_req = req; wake_up_all(&fc->reserved_req_waitq); @@ -891,7 +903,7 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, { unsigned i; struct fuse_req *req = cs->req; - unsigned offset = req->page_offset; + unsigned offset = req->page_descs[0].offset; unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { @@ -1599,7 +1611,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.h.nodeid = outarg->nodeid; req->in.numargs = 2; req->in.argpages = 1; - req->page_offset = offset; + req->page_descs[0].offset = offset; req->end = fuse_retrieve_end; index = outarg->offset >> PAGE_CACHE_SHIFT; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fdb5b33198a..2b6f08ac62c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -798,7 +798,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, res = fuse_send_write(req, file, pos, count, NULL); - offset = req->page_offset; + offset = req->page_descs[0].offset; count = res; for (i = 0; i < req->num_pages; i++) { struct page *page = req->pages[i]; @@ -829,7 +829,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, int err; req->in.argpages = 1; - req->page_offset = offset; + req->page_descs[0].offset = offset; do { size_t tmp; @@ -1070,14 +1070,14 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, return npages; req->num_pages = npages; - req->page_offset = offset; + req->page_descs[0].offset = offset; if (write) req->in.argpages = 1; else req->out.argpages = 1; - nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; + nbytes = (req->num_pages << PAGE_SHIFT) - req->page_descs[0].offset; *nbytesp = min(*nbytesp, nbytes); return 0; @@ -1314,7 +1314,7 @@ static int fuse_writepage_locked(struct page *page) req->in.argpages = 1; req->num_pages = 1; req->pages[0] = tmp_page; - req->page_offset = 0; + req->page_descs[0].offset = 0; req->end = fuse_writepage_end; req->inode = inode; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5b21e6ab9e7..70cef60afe0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -203,6 +203,12 @@ struct fuse_out { struct fuse_arg args[3]; }; +/** FUSE page descriptor */ +struct fuse_page_desc { + unsigned int length; + unsigned int offset; +}; + /** The request state */ enum fuse_req_state { FUSE_REQ_INIT = 0, @@ -296,18 +302,21 @@ struct fuse_req { /** page vector */ struct page **pages; + /** page-descriptor vector */ + struct fuse_page_desc *page_descs; + /** size of the 'pages' array */ unsigned max_pages; /** inline page vector */ struct page *inline_pages[FUSE_REQ_INLINE_PAGES]; + /** inline page-descriptor vector */ + struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES]; + /** number of pages in vector */ unsigned num_pages; - /** offset of data on first page */ - unsigned page_offset; - /** File used in the request (or NULL) */ struct fuse_file *ff; -- cgit v1.2.3-18-g5258 From 85f40aec887110ae6bbefa87988def4606a3d583 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:49:33 +0400 Subject: fuse: use req->page_descs[] for argpages cases Previously, anyone who set flag 'argpages' only filled req->pages[] and set per-request page_offset. This patch re-works all cases where argpages=1 to fill req->page_descs[] properly. Having req->page_descs[] filled properly allows to re-work fuse_copy_pages() to copy page fragments described by req->page_descs[]. This will be useful for next patches optimizing direct_IO. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 1 + fs/fuse/dev.c | 7 +++---- fs/fuse/dir.c | 1 + fs/fuse/file.c | 20 ++++++++++++++++++++ 4 files changed, 25 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 5cc838f320d..fb9dcfdf040 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -449,6 +449,7 @@ static int cuse_send_init(struct cuse_conn *cc) req->out.argvar = 1; req->out.argpages = 1; req->pages[0] = page; + req->page_descs[0].length = req->out.args[1].size; req->num_pages = 1; req->end = cuse_process_init_reply; fuse_request_send_background(fc, req); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index db4af8f3886..cbae09e5a49 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -903,11 +903,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, { unsigned i; struct fuse_req *req = cs->req; - unsigned offset = req->page_descs[0].offset; - unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { int err; + unsigned offset = req->page_descs[i].offset; + unsigned count = min(nbytes, req->page_descs[i].length); err = fuse_copy_page(cs, &req->pages[i], offset, count, zeroing); @@ -915,8 +915,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, return err; nbytes -= count; - count = min(nbytes, (unsigned) PAGE_SIZE); - offset = 0; } return 0; } @@ -1626,6 +1624,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].length = this_num; req->num_pages++; offset = 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d04bcc5cccd..ed8f8c55ce1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1317,6 +1317,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; + req->page_descs[0].length = PAGE_SIZE; if (fc->do_readdirplus) { attr_version = fuse_get_attr_version(fc); fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2b6f08ac62c..3384a200b3e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -555,6 +555,7 @@ static int fuse_readpage(struct file *file, struct page *page) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; + req->page_descs[0].length = count; num_read = fuse_send_read(req, file, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); @@ -674,6 +675,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) page_cache_get(page); req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].length = PAGE_SIZE; req->num_pages++; data->nr_pages--; return 0; @@ -869,6 +871,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, err = 0; req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].length = tmp; req->num_pages++; iov_iter_advance(ii, tmp); @@ -1044,6 +1047,15 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +static inline void fuse_page_descs_length_init(struct fuse_req *req) +{ + int i; + + for (i = 0; i < req->num_pages; i++) + req->page_descs[i].length = PAGE_SIZE - + req->page_descs[i].offset; +} + static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, size_t *nbytesp, int write) { @@ -1071,6 +1083,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, req->num_pages = npages; req->page_descs[0].offset = offset; + fuse_page_descs_length_init(req); if (write) req->in.argpages = 1; @@ -1078,6 +1091,11 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, req->out.argpages = 1; nbytes = (req->num_pages << PAGE_SHIFT) - req->page_descs[0].offset; + + if (*nbytesp < nbytes) + req->page_descs[req->num_pages - 1].length -= + nbytes - *nbytesp; + *nbytesp = min(*nbytesp, nbytes); return 0; @@ -1315,6 +1333,7 @@ static int fuse_writepage_locked(struct page *page) req->num_pages = 1; req->pages[0] = tmp_page; req->page_descs[0].offset = 0; + req->page_descs[0].length = PAGE_SIZE; req->end = fuse_writepage_end; req->inode = inode; @@ -1902,6 +1921,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, } memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); req->num_pages = num_pages; + fuse_page_descs_length_init(req); /* okay, let's send it to the client */ req->in.h.opcode = FUSE_IOCTL; -- cgit v1.2.3-18-g5258 From b98d023a24496bf7d538c549e5426b1173c6f55d Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:50:15 +0400 Subject: fuse: pass iov[] to fuse_get_user_pages() The patch makes preliminary work for the next patch optimizing scatter-gather direct IO. The idea is to allow fuse_get_user_pages() to pack as many iov-s to each fuse request as possible. So, here we only rework all related call-paths to carry iov[] from fuse_direct_IO() to fuse_get_user_pages(). Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 103 +++++++++++++++++++++++++++------------------------------ 1 file changed, 49 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3384a200b3e..542ad97b103 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1056,14 +1056,18 @@ static inline void fuse_page_descs_length_init(struct fuse_req *req) req->page_descs[i].offset; } -static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, +static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t *nbytesp, int write) { size_t nbytes = *nbytesp; - unsigned long user_addr = (unsigned long) buf; - unsigned offset = user_addr & ~PAGE_MASK; + size_t frag_size = min(iov_iter_single_seg_count(ii), nbytes); + unsigned long user_addr; + unsigned offset; int npages; + user_addr = (unsigned long)ii->iov->iov_base + ii->iov_offset; + offset = user_addr & ~PAGE_MASK; + /* Special case for kernel I/O: can copy directly into the buffer */ if (segment_eq(get_fs(), KERNEL_DS)) { if (write) @@ -1071,10 +1075,12 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, else req->out.args[0].value = (void *) user_addr; + iov_iter_advance(ii, frag_size); + *nbytesp = frag_size; return 0; } - nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); + nbytes = min_t(size_t, frag_size, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); npages = get_user_pages_fast(user_addr, npages, !write, req->pages); @@ -1092,17 +1098,19 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, nbytes = (req->num_pages << PAGE_SHIFT) - req->page_descs[0].offset; - if (*nbytesp < nbytes) + if (frag_size < nbytes) req->page_descs[req->num_pages - 1].length -= - nbytes - *nbytesp; + nbytes - frag_size; - *nbytesp = min(*nbytesp, nbytes); + *nbytesp = min(frag_size, nbytes); + iov_iter_advance(ii, *nbytesp); return 0; } -ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write) +static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, + unsigned long nr_segs, size_t count, + loff_t *ppos, int write) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -1110,6 +1118,9 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, loff_t pos = *ppos; ssize_t res = 0; struct fuse_req *req; + struct iov_iter ii; + + iov_iter_init(&ii, iov, nr_segs, count, 0); req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) @@ -1119,7 +1130,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, size_t nres; fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); - int err = fuse_get_user_pages(req, buf, &nbytes, write); + int err = fuse_get_user_pages(req, &ii, &nbytes, write); if (err) { res = err; break; @@ -1142,7 +1153,6 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, count -= nres; res += nres; pos += nres; - buf += nres; if (nres != nbytes) break; if (count) { @@ -1159,10 +1169,17 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, return res; } + +ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write) +{ + struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; + return __fuse_direct_io(file, &iov, 1, count, ppos, write); +} EXPORT_SYMBOL_GPL(fuse_direct_io); -static ssize_t fuse_direct_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) { ssize_t res; struct inode *inode = file->f_path.dentry->d_inode; @@ -1170,22 +1187,31 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, if (is_bad_inode(inode)) return -EIO; - res = fuse_direct_io(file, buf, count, ppos, 0); + res = __fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs), + ppos, 0); fuse_invalidate_attr(inode); return res; } -static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t fuse_direct_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; + return __fuse_direct_read(file, &iov, 1, ppos); +} + +static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) { struct inode *inode = file->f_path.dentry->d_inode; + size_t count = iov_length(iov, nr_segs); ssize_t res; res = generic_write_checks(file, ppos, &count, 0); if (!res) { - res = fuse_direct_io(file, buf, count, ppos, 1); + res = __fuse_direct_io(file, iov, nr_segs, count, ppos, 1); if (res > 0) fuse_write_update_size(inode, *ppos); } @@ -1198,6 +1224,7 @@ static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, static ssize_t fuse_direct_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; struct inode *inode = file->f_path.dentry->d_inode; ssize_t res; @@ -1206,7 +1233,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = __fuse_direct_write(file, buf, count, ppos); + res = __fuse_direct_write(file, &iov, 1, ppos); mutex_unlock(&inode->i_mutex); return res; @@ -2167,41 +2194,6 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc, return 0; } -static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos, int rw) -{ - const struct iovec *vector = iov; - ssize_t ret = 0; - - while (nr_segs > 0) { - void __user *base; - size_t len; - ssize_t nr; - - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - if (rw == WRITE) - nr = __fuse_direct_write(filp, base, len, ppos); - else - nr = fuse_direct_read(filp, base, len, ppos); - - if (nr < 0) { - if (!ret) - ret = nr; - break; - } - ret += nr; - if (nr != len) - break; - } - - return ret; -} - - static ssize_t fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -2213,7 +2205,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, file = iocb->ki_filp; pos = offset; - ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw); + if (rw == WRITE) + ret = __fuse_direct_write(file, iov, nr_segs, &pos); + else + ret = __fuse_direct_read(file, iov, nr_segs, &pos); return ret; } -- cgit v1.2.3-18-g5258 From 7c190c8b9c0dd373cdd4d96e63306ec6e1a7115d Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:50:29 +0400 Subject: fuse: optimize fuse_get_user_pages() Let fuse_get_user_pages() pack as many iov-s to a single fuse_req as possible. This is very beneficial in case of iov[] consisting of many iov-s of relatively small sizes (e.g. PAGE_SIZE). Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 79 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 542ad97b103..b2aa6c21e20 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1047,29 +1047,37 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } -static inline void fuse_page_descs_length_init(struct fuse_req *req) +static inline void fuse_page_descs_length_init(struct fuse_req *req, + unsigned index, unsigned nr_pages) { int i; - for (i = 0; i < req->num_pages; i++) + for (i = index; i < index + nr_pages; i++) req->page_descs[i].length = PAGE_SIZE - req->page_descs[i].offset; } +static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) +{ + return (unsigned long)ii->iov->iov_base + ii->iov_offset; +} + +static inline size_t fuse_get_frag_size(const struct iov_iter *ii, + size_t max_size) +{ + return min(iov_iter_single_seg_count(ii), max_size); +} + static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t *nbytesp, int write) { - size_t nbytes = *nbytesp; - size_t frag_size = min(iov_iter_single_seg_count(ii), nbytes); - unsigned long user_addr; - unsigned offset; - int npages; - - user_addr = (unsigned long)ii->iov->iov_base + ii->iov_offset; - offset = user_addr & ~PAGE_MASK; + size_t nbytes = 0; /* # bytes already packed in req */ /* Special case for kernel I/O: can copy directly into the buffer */ if (segment_eq(get_fs(), KERNEL_DS)) { + unsigned long user_addr = fuse_get_user_addr(ii); + size_t frag_size = fuse_get_frag_size(ii, *nbytesp); + if (write) req->in.args[1].value = (void *) user_addr; else @@ -1080,30 +1088,45 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, return 0; } - nbytes = min_t(size_t, frag_size, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); - npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); - npages = get_user_pages_fast(user_addr, npages, !write, req->pages); - if (npages < 0) - return npages; + while (nbytes < *nbytesp && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { + unsigned npages; + unsigned long user_addr = fuse_get_user_addr(ii); + unsigned offset = user_addr & ~PAGE_MASK; + size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes); + int ret; - req->num_pages = npages; - req->page_descs[0].offset = offset; - fuse_page_descs_length_init(req); + unsigned n = FUSE_MAX_PAGES_PER_REQ - req->num_pages; + frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); + + npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + npages = clamp(npages, 1U, n); + + ret = get_user_pages_fast(user_addr, npages, !write, + &req->pages[req->num_pages]); + if (ret < 0) + return ret; + + npages = ret; + frag_size = min_t(size_t, frag_size, + (npages << PAGE_SHIFT) - offset); + iov_iter_advance(ii, frag_size); + + req->page_descs[req->num_pages].offset = offset; + fuse_page_descs_length_init(req, req->num_pages, npages); + + req->num_pages += npages; + req->page_descs[req->num_pages - 1].length -= + (npages << PAGE_SHIFT) - offset - frag_size; + + nbytes += frag_size; + } if (write) req->in.argpages = 1; else req->out.argpages = 1; - nbytes = (req->num_pages << PAGE_SHIFT) - req->page_descs[0].offset; - - if (frag_size < nbytes) - req->page_descs[req->num_pages - 1].length -= - nbytes - frag_size; - - *nbytesp = min(frag_size, nbytes); - iov_iter_advance(ii, *nbytesp); + *nbytesp = nbytes; return 0; } @@ -1948,7 +1971,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, } memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); req->num_pages = num_pages; - fuse_page_descs_length_init(req); + fuse_page_descs_length_init(req, 0, req->num_pages); /* okay, let's send it to the client */ req->in.h.opcode = FUSE_IOCTL; -- cgit v1.2.3-18-g5258 From 5565a9d884327ac45d49041f1b846dac273e110c Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:50:36 +0400 Subject: fuse: optimize __fuse_direct_io() __fuse_direct_io() allocates fuse-requests by calling fuse_get_req(fc, n). The patch calculates 'n' based on iov[] array. This is useful because allocating FUSE_MAX_PAGES_PER_REQ page pointers and descriptors for each fuse request would be waste of memory in case of iov-s of smaller size. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b2aa6c21e20..68e10d43bd3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1088,14 +1088,14 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, return 0; } - while (nbytes < *nbytesp && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { + while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; unsigned long user_addr = fuse_get_user_addr(ii); unsigned offset = user_addr & ~PAGE_MASK; size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes); int ret; - unsigned n = FUSE_MAX_PAGES_PER_REQ - req->num_pages; + unsigned n = req->max_pages - req->num_pages; frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -1131,6 +1131,23 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, return 0; } +static inline int fuse_iter_npages(const struct iov_iter *ii_p) +{ + struct iov_iter ii = *ii_p; + int npages = 0; + + while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) { + unsigned long user_addr = fuse_get_user_addr(&ii); + unsigned offset = user_addr & ~PAGE_MASK; + size_t frag_size = iov_iter_single_seg_count(&ii); + + npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + iov_iter_advance(&ii, frag_size); + } + + return min(npages, FUSE_MAX_PAGES_PER_REQ); +} + static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, unsigned long nr_segs, size_t count, loff_t *ppos, int write) @@ -1145,7 +1162,7 @@ static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, iov_iter_init(&ii, iov, nr_segs, count, 0); - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + req = fuse_get_req(fc, fuse_iter_npages(&ii)); if (IS_ERR(req)) return PTR_ERR(req); @@ -1180,7 +1197,7 @@ static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, break; if (count) { fuse_put_request(fc, req); - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + req = fuse_get_req(fc, fuse_iter_npages(&ii)); if (IS_ERR(req)) break; } -- cgit v1.2.3-18-g5258 From fb05f41f5f96f7423c53da4d87913fb44fd0565d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 10 Nov 2012 16:55:56 +0100 Subject: fuse: cleanup fuse_direct_io() Fix the following sparse warnings: fs/fuse/file.c:1216:43: warning: cast removes address space of expression fs/fuse/file.c:1216:43: warning: incorrect type in initializer (different address spaces) fs/fuse/file.c:1216:43: expected void [noderef] *iov_base fs/fuse/file.c:1216:43: got void * fs/fuse/file.c:1241:43: warning: cast removes address space of expression fs/fuse/file.c:1241:43: warning: incorrect type in initializer (different address spaces) fs/fuse/file.c:1241:43: expected void [noderef] *iov_base fs/fuse/file.c:1241:43: got void * fs/fuse/file.c:1267:43: warning: cast removes address space of expression fs/fuse/file.c:1267:43: warning: incorrect type in initializer (different address spaces) fs/fuse/file.c:1267:43: expected void [noderef] *iov_base fs/fuse/file.c:1267:43: got void * Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 7 +++++-- fs/fuse/file.c | 23 ++++++++--------------- fs/fuse/fuse_i.h | 5 +++-- 3 files changed, 16 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index fb9dcfdf040..6f96a8def14 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -91,19 +91,22 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { loff_t pos = 0; + struct iovec iov = { .iov_base = buf, .iov_len = count }; - return fuse_direct_io(file, buf, count, &pos, 0); + return fuse_direct_io(file, &iov, 1, count, &pos, 0); } static ssize_t cuse_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { loff_t pos = 0; + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; + /* * No locking or generic_write_checks(), the server is * responsible for locking and sanity checks. */ - return fuse_direct_io(file, buf, count, &pos, 1); + return fuse_direct_io(file, &iov, 1, count, &pos, 1); } static int cuse_open(struct inode *inode, struct file *file) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 68e10d43bd3..28bc9c67219 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1148,9 +1148,9 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p) return min(npages, FUSE_MAX_PAGES_PER_REQ); } -static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, - unsigned long nr_segs, size_t count, - loff_t *ppos, int write) +ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, + unsigned long nr_segs, size_t count, loff_t *ppos, + int write) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -1209,13 +1209,6 @@ static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, return res; } - -ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write) -{ - struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; - return __fuse_direct_io(file, &iov, 1, count, ppos, write); -} EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, @@ -1227,8 +1220,8 @@ static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, if (is_bad_inode(inode)) return -EIO; - res = __fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs), - ppos, 0); + res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs), + ppos, 0); fuse_invalidate_attr(inode); @@ -1238,7 +1231,7 @@ static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, static ssize_t fuse_direct_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; + struct iovec iov = { .iov_base = buf, .iov_len = count }; return __fuse_direct_read(file, &iov, 1, ppos); } @@ -1251,7 +1244,7 @@ static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov, res = generic_write_checks(file, ppos, &count, 0); if (!res) { - res = __fuse_direct_io(file, iov, nr_segs, count, ppos, 1); + res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1); if (res > 0) fuse_write_update_size(inode, *ppos); } @@ -1264,7 +1257,7 @@ static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov, static ssize_t fuse_direct_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; struct inode *inode = file->f_path.dentry->d_inode; ssize_t res; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 70cef60afe0..13befcd29c5 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -811,8 +811,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); -ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write); +ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, + unsigned long nr_segs, size_t count, loff_t *ppos, + int write); long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags); long fuse_ioctl_common(struct file *file, unsigned int cmd, -- cgit v1.2.3-18-g5258 From c2132c1bc73d9a279cec148f74ea709c960b3d89 Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Mon, 14 Jan 2013 22:30:00 -0800 Subject: Do not use RCU for current process credentials Commit c69e8d9c0 added rcu lock to fuse/dir.c It was assuming that 'task' is some other process but in fact this parameter always equals to 'current'. Inline this parameter to make it more readable and remove RCU lock as it is not needed when access current process credentials. Signed-off-by: Anatol Pomozov Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 22 +++++++++------------- fs/fuse/file.c | 2 +- fs/fuse/fuse_i.h | 4 ++-- fs/fuse/inode.c | 2 +- 4 files changed, 13 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ed8f8c55ce1..aa0b6ade0e6 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -985,7 +985,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, /* * Calling into a user-controlled filesystem gives the filesystem - * daemon ptrace-like capabilities over the requester process. This + * daemon ptrace-like capabilities over the current process. This * means, that the filesystem daemon is able to record the exact * filesystem operations performed, and can also control the behavior * of the requester process in otherwise impossible ways. For example @@ -996,27 +996,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, * for which the owner of the mount has ptrace privilege. This * excludes processes started by other users, suid or sgid processes. */ -int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) +int fuse_allow_current_process(struct fuse_conn *fc) { const struct cred *cred; - int ret; if (fc->flags & FUSE_ALLOW_OTHER) return 1; - rcu_read_lock(); - ret = 0; - cred = __task_cred(task); + cred = current_cred(); if (uid_eq(cred->euid, fc->user_id) && uid_eq(cred->suid, fc->user_id) && uid_eq(cred->uid, fc->user_id) && gid_eq(cred->egid, fc->group_id) && gid_eq(cred->sgid, fc->group_id) && gid_eq(cred->gid, fc->group_id)) - ret = 1; - rcu_read_unlock(); + return 1; - return ret; + return 0; } static int fuse_access(struct inode *inode, int mask) @@ -1077,7 +1073,7 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; /* @@ -1544,7 +1540,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, loff_t oldsize; int err; - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) @@ -1653,7 +1649,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; return fuse_update_attributes(inode, stat, NULL, NULL); @@ -1756,7 +1752,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; if (fc->no_listxattr) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 28bc9c67219..a010585b0a7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2082,7 +2082,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, struct inode *inode = file->f_dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; if (is_bad_inode(inode)) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 13befcd29c5..af51c146a9a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -774,9 +774,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc); int fuse_valid_type(int m); /** - * Is task allowed to perform filesystem operation? + * Is current process allowed to perform filesystem operation? */ -int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task); +int fuse_allow_current_process(struct fuse_conn *fc); u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9d95a5a3d55..79b70deb7cd 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -408,7 +408,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) struct fuse_statfs_out outarg; int err; - if (!fuse_allow_task(fc, current)) { + if (!fuse_allow_current_process(fc)) { buf->f_type = FUSE_SUPER_MAGIC; return 0; } -- cgit v1.2.3-18-g5258 From 4582a4ab2a0e7218449fb2e895d0aae9ea753c94 Mon Sep 17 00:00:00 2001 From: Feng Shuo Date: Tue, 15 Jan 2013 11:23:28 +0800 Subject: FUSE: Adapt readdirplus to application usage patterns Use the same adaptive readdirplus mechanism as NFS: http://permalink.gmane.org/gmane.linux.nfs/49299 If the user space implementation wants to disable readdirplus temporarily, it could just return ENOTSUPP. Then kernel will recall it with readdir. Signed-off-by: Feng Shuo Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 31 ++++++++++++++++++++++++++++--- fs/fuse/fuse_i.h | 9 +++++++++ fs/fuse/inode.c | 1 + 3 files changed, 38 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index aa0b6ade0e6..dc5e6489337 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -14,6 +14,27 @@ #include #include +static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_inode *fi = get_fuse_inode(dir); + + if (!fc->do_readdirplus) + return false; + if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) + return true; + if (filp->f_pos == 0) + return true; + return false; +} + +static void fuse_advise_use_readdirplus(struct inode *dir) +{ + struct fuse_inode *fi = get_fuse_inode(dir); + + set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); +} + #if BITS_PER_LONG >= 64 static inline void fuse_dentry_settime(struct dentry *entry, u64 time) { @@ -219,6 +240,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) attr_version); fuse_change_entry_timeout(entry, &outarg); } + fuse_advise_use_readdirplus(inode); return 1; } @@ -355,6 +377,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, else fuse_invalidate_entry_cache(entry); + fuse_advise_use_readdirplus(dir); return newent; out_iput: @@ -1290,7 +1313,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) { - int err; + int plus, err; size_t nbytes; struct page *page; struct inode *inode = file->f_path.dentry->d_inode; @@ -1310,11 +1333,13 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) fuse_put_request(fc, req); return -ENOMEM; } + + plus = fuse_use_readdirplus(inode, file); req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; req->page_descs[0].length = PAGE_SIZE; - if (fc->do_readdirplus) { + if (plus) { attr_version = fuse_get_attr_version(fc); fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIRPLUS); @@ -1327,7 +1352,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) err = req->out.h.error; fuse_put_request(fc, req); if (!err) { - if (fc->do_readdirplus) { + if (plus) { err = parse_dirplusfile(page_address(page), nbytes, file, dstbuf, filldir, attr_version); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index af51c146a9a..fc55dd33c1e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -106,6 +106,15 @@ struct fuse_inode { /** List of writepage requestst (pending or sent) */ struct list_head writepages; + + /** Miscellaneous bits describing inode state */ + unsigned long state; +}; + +/** FUSE inode state bits */ +enum { + /** Advise readdirplus */ + FUSE_I_ADVISE_RDPLUS, }; struct fuse_conn; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 79b70deb7cd..9876a87255f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -92,6 +92,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->attr_version = 0; fi->writectr = 0; fi->orig_ino = 0; + fi->state = 0; INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); -- cgit v1.2.3-18-g5258 From 6a4e922c3db06f7da27e072729e047185c2fba66 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 4 Feb 2013 13:04:44 +0000 Subject: fuse: avoid out-of-scope stack access The all pointers within fuse_req must point to valid memory once fuse_force_forget() returns. This bug appeared in "fuse: implement NFS-like readdirplus support" and was never in any official Linux release. I tested the fuse_force_forget() code path by injecting to fake -ENOMEM and verified the FORGET operation was called properly in userspace. Signed-off-by: Eric Wong Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cbae09e5a49..e9bdec0b16d 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -440,9 +440,8 @@ __acquires(fc->lock) } } -void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) +static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { - req->isreply = 1; spin_lock(&fc->lock); if (!fc->connected) req->out.h.error = -ENOTCONN; @@ -459,6 +458,12 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) } spin_unlock(&fc->lock); } + +void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) +{ + req->isreply = 1; + __fuse_request_send(fc, req); +} EXPORT_SYMBOL_GPL(fuse_request_send); static void fuse_request_send_nowait_locked(struct fuse_conn *fc, @@ -541,7 +546,9 @@ void fuse_force_forget(struct file *file, u64 nodeid) req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; req->isreply = 0; - fuse_request_send_nowait(fc, req); + __fuse_request_send(fc, req); + /* ignore errors */ + fuse_put_request(fc, req); } /* -- cgit v1.2.3-18-g5258 From dfca7cebc2679f3d129f8e680a8f199a7ad16e38 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 4 Feb 2013 15:57:42 +0100 Subject: fuse: don't WARN when nlink is zero drop_nlink() warns if nlink is already zero. This is triggerable by a buggy userspace filesystem. The cure, I think, is worse than the disease so disable the warning. Reported-by: Tero Roponen Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dc5e6489337..2b112d978e9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -705,7 +705,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) spin_lock(&fc->lock); fi->attr_version = ++fc->attr_version; - drop_nlink(inode); + /* + * If i_nlink == 0 then unlink doesn't make sense, yet this can + * happen if userspace filesystem is careless. It would be + * difficult to enforce correct nlink usage so just ignore this + * condition here + */ + if (inode->i_nlink > 0) + drop_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); -- cgit v1.2.3-18-g5258 From 0415d291022543d83ee799e9ffee08d856bca6e8 Mon Sep 17 00:00:00 2001 From: Enke Chen Date: Mon, 4 Feb 2013 16:14:32 +0100 Subject: fuse: send poll events commit 626cf23660 "poll: add poll_requested_events()..." enabled us to send the requested events to the filesystem. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a010585b0a7..c8071768b95 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2167,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) return DEFAULT_POLLMASK; poll_wait(file, &ff->poll_wait, wait); + inarg.events = (__u32)poll_requested_events(wait); /* * Ask for notification iff there's someone waiting for it. -- cgit v1.2.3-18-g5258 From 634734b63ac39e137a1c623ba74f3e062b6577db Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 6 Feb 2013 22:29:01 +0000 Subject: fuse: allow control of adaptive readdirplus use For some filesystems (e.g. GlusterFS), the cost of performing a normal readdir and readdirplus are identical. Since adaptively using readdirplus has no benefit for those systems, give users/filesystems the option to control adaptive readdirplus use. v2 of this patch incorporates Miklos's suggestion to simplify the code, as well as improving consistency of macro names and documentation. Signed-off-by: Eric Wong Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 2 ++ fs/fuse/fuse_i.h | 5 ++++- fs/fuse/inode.c | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2b112d978e9..85065221a58 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -21,6 +21,8 @@ static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) if (!fc->do_readdirplus) return false; + if (!fc->readdirplus_auto) + return true; if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) return true; if (filp->f_pos == 0) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc55dd33c1e..6aeba864f07 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -514,9 +514,12 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; - /** Does the filesystem support readdir-plus? */ + /** Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; + /** Does the filesystem want adaptive readdirplus? */ + unsigned readdirplus_auto:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9876a87255f..01353ed7575 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -866,6 +866,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->auto_inval_data = 1; if (arg->flags & FUSE_DO_READDIRPLUS) fc->do_readdirplus = 1; + if (arg->flags & FUSE_READDIRPLUS_AUTO) + fc->readdirplus_auto = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -893,7 +895,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | - FUSE_DO_READDIRPLUS; + FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); -- cgit v1.2.3-18-g5258