diff options
Diffstat (limited to 'fs/fuse/dir.c')
| -rw-r--r-- | fs/fuse/dir.c | 1675 |
1 files changed, 1273 insertions, 402 deletions
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 417bcee466f..0c6048247a3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -10,9 +10,59 @@ #include <linux/pagemap.h> #include <linux/file.h> -#include <linux/gfp.h> #include <linux/sched.h> #include <linux/namei.h> +#include <linux/slab.h> + +static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_inode *fi = get_fuse_inode(dir); + + if (!fc->do_readdirplus) + return false; + if (!fc->readdirplus_auto) + return true; + if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) + return true; + if (ctx->pos == 0) + return true; + return false; +} + +static void fuse_advise_use_readdirplus(struct inode *dir) +{ + struct fuse_inode *fi = get_fuse_inode(dir); + + set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); +} + +#if BITS_PER_LONG >= 64 +static inline void fuse_dentry_settime(struct dentry *entry, u64 time) +{ + entry->d_time = time; +} + +static inline u64 fuse_dentry_time(struct dentry *entry) +{ + return entry->d_time; +} +#else +/* + * On 32 bit archs store the high 32 bits of time in d_fsdata + */ +static void fuse_dentry_settime(struct dentry *entry, u64 time) +{ + entry->d_time = time; + entry->d_fsdata = (void *) (unsigned long) (time >> 32); +} + +static u64 fuse_dentry_time(struct dentry *entry) +{ + return (u64) entry->d_time + + ((u64) (unsigned long) entry->d_fsdata << 32); +} +#endif /* * FUSE caches dentries and attributes with separate timeout. The @@ -23,23 +73,34 @@ /* * Calculate the time in jiffies until a dentry/attributes are valid */ -static inline unsigned long time_to_jiffies(unsigned long sec, - unsigned long nsec) +static u64 time_to_jiffies(unsigned long sec, unsigned long nsec) { - struct timespec ts = {sec, nsec}; - return jiffies + timespec_to_jiffies(&ts); + if (sec || nsec) { + struct timespec ts = {sec, nsec}; + return get_jiffies_64() + timespec_to_jiffies(&ts); + } else + return 0; } /* * Set dentry and possibly attribute timeouts from the lookup/mk* * replies */ -static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) +static void fuse_change_entry_timeout(struct dentry *entry, + struct fuse_entry_out *o) { - entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec); - if (entry->d_inode) - get_fuse_inode(entry->d_inode)->i_time = - time_to_jiffies(o->attr_valid, o->attr_valid_nsec); + fuse_dentry_settime(entry, + time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); +} + +static u64 attr_timeout(struct fuse_attr_out *o) +{ + return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); +} + +static u64 entry_attr_timeout(struct fuse_entry_out *o) +{ + return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); } /* @@ -48,7 +109,17 @@ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) */ void fuse_invalidate_attr(struct inode *inode) { - get_fuse_inode(inode)->i_time = jiffies - 1; + get_fuse_inode(inode)->i_time = 0; +} + +/** + * Mark the attributes as stale due to an atime change. Avoid the invalidate if + * atime is not used. + */ +void fuse_invalidate_atime(struct inode *inode) +{ + if (!IS_RDONLY(inode)) + fuse_invalidate_attr(inode); } /* @@ -59,9 +130,9 @@ void fuse_invalidate_attr(struct inode *inode) * timeout is unknown (unlink, rmdir, rename and in some cases * lookup) */ -static void fuse_invalidate_entry_cache(struct dentry *entry) +void fuse_invalidate_entry_cache(struct dentry *entry) { - entry->d_time = jiffies - 1; + fuse_dentry_settime(entry, 0); } /* @@ -74,21 +145,39 @@ static void fuse_invalidate_entry(struct dentry *entry) fuse_invalidate_entry_cache(entry); } -static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, - struct dentry *entry, +static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req, + u64 nodeid, struct qstr *name, struct fuse_entry_out *outarg) { + memset(outarg, 0, sizeof(struct fuse_entry_out)); req->in.h.opcode = FUSE_LOOKUP; - req->in.h.nodeid = get_node_id(dir); - req->inode = dir; + req->in.h.nodeid = nodeid; req->in.numargs = 1; - req->in.args[0].size = entry->d_name.len + 1; - req->in.args[0].value = entry->d_name.name; + req->in.args[0].size = name->len + 1; + req->in.args[0].value = name->name; req->out.numargs = 1; - req->out.args[0].size = sizeof(struct fuse_entry_out); + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + else + req->out.args[0].size = sizeof(struct fuse_entry_out); req->out.args[0].value = outarg; } +u64 fuse_get_attr_version(struct fuse_conn *fc) +{ + u64 curr_version; + + /* + * The spin lock isn't actually needed on 64bit archs, but we + * don't yet care too much about such optimizations. + */ + spin_lock(&fc->lock); + curr_version = fc->attr_version; + spin_unlock(&fc->lock); + + return curr_version; +} + /* * Check whether the dentry is still valid * @@ -98,125 +187,212 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, * the lookup once more. If the lookup results in the same inode, * then refresh the attributes, timeouts and mark the dentry valid. */ -static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) +static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) { - struct inode *inode = entry->d_inode; + struct inode *inode; + struct dentry *parent; + struct fuse_conn *fc; + struct fuse_inode *fi; + int ret; + inode = ACCESS_ONCE(entry->d_inode); if (inode && is_bad_inode(inode)) - return 0; - else if (time_after(jiffies, entry->d_time)) { + goto invalid; + else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || + (flags & LOOKUP_REVAL)) { int err; struct fuse_entry_out outarg; - struct fuse_conn *fc; struct fuse_req *req; + struct fuse_forget_link *forget; + u64 attr_version; - /* Doesn't hurt to "reset" the validity timeout */ - fuse_invalidate_entry_cache(entry); + /* For negative dentries, always do a fresh lookup */ if (!inode) - return 0; + goto invalid; + + ret = -ECHILD; + if (flags & LOOKUP_RCU) + goto out; fc = get_fuse_conn(inode); - req = fuse_get_request(fc); - if (!req) - return 0; + req = fuse_get_req_nopages(fc); + ret = PTR_ERR(req); + if (IS_ERR(req)) + goto out; + + forget = fuse_alloc_forget(); + if (!forget) { + fuse_put_request(fc, req); + ret = -ENOMEM; + goto out; + } + + attr_version = fuse_get_attr_version(fc); - fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg); - request_send(fc, req); + parent = dget_parent(entry); + fuse_lookup_init(fc, req, get_node_id(parent->d_inode), + &entry->d_name, &outarg); + fuse_request_send(fc, req); + dput(parent); err = req->out.h.error; + fuse_put_request(fc, req); + /* Zero nodeid is same as -ENOENT */ + if (!err && !outarg.nodeid) + err = -ENOENT; if (!err) { - struct fuse_inode *fi = get_fuse_inode(inode); + fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { - fuse_send_forget(fc, req, outarg.nodeid, 1); - return 0; + fuse_queue_forget(fc, forget, outarg.nodeid, 1); + goto invalid; } - fi->nlookup ++; + spin_lock(&fc->lock); + fi->nlookup++; + spin_unlock(&fc->lock); } - fuse_put_request(fc, req); + kfree(forget); if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) - return 0; - - fuse_change_attributes(inode, &outarg.attr); - fuse_change_timeout(entry, &outarg); - } - return 1; -} - -/* - * Check if there's already a hashed alias of this directory inode. - * If yes, then lookup and mkdir must not create a new alias. - */ -static int dir_alias(struct inode *inode) -{ - if (S_ISDIR(inode->i_mode)) { - struct dentry *alias = d_find_alias(inode); - if (alias) { - dput(alias); - return 1; + goto invalid; + + fuse_change_attributes(inode, &outarg.attr, + entry_attr_timeout(&outarg), + attr_version); + fuse_change_entry_timeout(entry, &outarg); + } else if (inode) { + fi = get_fuse_inode(inode); + if (flags & LOOKUP_RCU) { + if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) + return -ECHILD; + } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { + parent = dget_parent(entry); + fuse_advise_use_readdirplus(parent->d_inode); + dput(parent); } } - return 0; + ret = 1; +out: + return ret; + +invalid: + ret = 0; + + if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0) + ret = 1; + goto out; } -static inline int invalid_nodeid(u64 nodeid) +static int invalid_nodeid(u64 nodeid) { return !nodeid || nodeid == FUSE_ROOT_ID; } -static struct dentry_operations fuse_dentry_operations = { +const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, }; -static inline int valid_mode(int m) +int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); } -static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, - struct nameidata *nd) +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, + struct fuse_entry_out *outarg, struct inode **inode) { - int err; - struct fuse_entry_out outarg; - struct inode *inode = NULL; - struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_req *req; + struct fuse_forget_link *forget; + u64 attr_version; + int err; - if (entry->d_name.len > FUSE_NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); + *inode = NULL; + err = -ENAMETOOLONG; + if (name->len > FUSE_NAME_MAX) + goto out; - req = fuse_get_request(fc); - if (!req) - return ERR_PTR(-EINTR); + req = fuse_get_req_nopages(fc); + err = PTR_ERR(req); + if (IS_ERR(req)) + goto out; - fuse_lookup_init(req, dir, entry, &outarg); - request_send(fc, req); - err = req->out.h.error; - if (!err && ((outarg.nodeid && invalid_nodeid(outarg.nodeid)) || - !valid_mode(outarg.attr.mode))) - err = -EIO; - if (!err && outarg.nodeid) { - inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr); - if (!inode) { - fuse_send_forget(fc, req, outarg.nodeid, 1); - return ERR_PTR(-ENOMEM); - } + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) { + fuse_put_request(fc, req); + goto out; } + + attr_version = fuse_get_attr_version(fc); + + fuse_lookup_init(fc, req, nodeid, name, outarg); + fuse_request_send(fc, req); + err = req->out.h.error; fuse_put_request(fc, req); - if (err && err != -ENOENT) - return ERR_PTR(err); + /* Zero nodeid is same as -ENOENT, but with valid timeout */ + if (err || !outarg->nodeid) + goto out_put_forget; - if (inode && dir_alias(inode)) { - iput(inode); - return ERR_PTR(-EIO); + err = -EIO; + if (!outarg->nodeid) + goto out_put_forget; + if (!fuse_valid_type(outarg->attr.mode)) + goto out_put_forget; + + *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, + &outarg->attr, entry_attr_timeout(outarg), + attr_version); + err = -ENOMEM; + if (!*inode) { + fuse_queue_forget(fc, forget, outarg->nodeid, 1); + goto out; } - d_add(entry, inode); - entry->d_op = &fuse_dentry_operations; - if (!err) - fuse_change_timeout(entry, &outarg); + err = 0; + + out_put_forget: + kfree(forget); + out: + return err; +} + +static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, + unsigned int flags) +{ + int err; + struct fuse_entry_out outarg; + struct inode *inode; + struct dentry *newent; + bool outarg_valid = true; + + err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, + &outarg, &inode); + if (err == -ENOENT) { + outarg_valid = false; + err = 0; + } + if (err) + goto out_err; + + err = -EIO; + if (inode && get_node_id(inode) == FUSE_ROOT_ID) + goto out_iput; + + newent = d_materialise_unique(entry, inode); + err = PTR_ERR(newent); + if (IS_ERR(newent)) + goto out_err; + + entry = newent ? newent : entry; + if (outarg_valid) + fuse_change_entry_timeout(entry, &outarg); else fuse_invalidate_entry_cache(entry); - return NULL; + + fuse_advise_use_readdirplus(dir); + return newent; + + out_iput: + iput(inode); + out_err: + return ERR_PTR(err); } /* @@ -225,189 +401,258 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ -static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, - struct nameidata *nd) +static int fuse_create_open(struct inode *dir, struct dentry *entry, + struct file *file, unsigned flags, + umode_t mode, int *opened) { int err; struct inode *inode; struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; - struct fuse_open_in inarg; + struct fuse_forget_link *forget; + struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; struct fuse_file *ff; - struct file *file; - int flags = nd->intent.open.flags - 1; - err = -ENOSYS; - if (fc->no_create) - goto out; + /* Userspace expects S_IFREG in create mode */ + BUG_ON((mode & S_IFMT) != S_IFREG); - err = -EINTR; - req = fuse_get_request(fc); - if (!req) - goto out; + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) + goto out_err; + + req = fuse_get_req_nopages(fc); + err = PTR_ERR(req); + if (IS_ERR(req)) + goto out_put_forget_req; - ff = fuse_file_alloc(); + err = -ENOMEM; + ff = fuse_file_alloc(fc); if (!ff) goto out_put_request; + if (!fc->dont_mask) + mode &= ~current_umask(); + flags &= ~O_NOCTTY; memset(&inarg, 0, sizeof(inarg)); + memset(&outentry, 0, sizeof(outentry)); inarg.flags = flags; inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_CREATE; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; req->out.numargs = 2; - req->out.args[0].size = sizeof(outentry); + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + else + req->out.args[0].size = sizeof(outentry); req->out.args[0].value = &outentry; req->out.args[1].size = sizeof(outopen); req->out.args[1].value = &outopen; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; - if (err) { - if (err == -ENOSYS) - fc->no_create = 1; + if (err) goto out_free_ff; - } err = -EIO; if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) goto out_free_ff; + fuse_put_request(fc, req); + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, - &outentry.attr); - err = -ENOMEM; + &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); - ff->fh = outopen.fh; - /* Special release, with inode = NULL, this will - trigger a 'forget' request when the release is - complete */ - fuse_send_release(fc, ff, outentry.nodeid, NULL, flags, 0); - goto out_put_request; + fuse_sync_release(ff, flags); + fuse_queue_forget(fc, forget, outentry.nodeid, 1); + err = -ENOMEM; + goto out_err; } - fuse_put_request(fc, req); + kfree(forget); d_instantiate(entry, inode); - fuse_change_timeout(entry, &outentry); - file = lookup_instantiate_filp(nd, entry, generic_file_open); - if (IS_ERR(file)) { - ff->fh = outopen.fh; - fuse_send_release(fc, ff, outentry.nodeid, inode, flags, 0); - return PTR_ERR(file); + fuse_change_entry_timeout(entry, &outentry); + fuse_invalidate_attr(dir); + err = finish_open(file, entry, generic_file_open, opened); + if (err) { + fuse_sync_release(ff, flags); + } else { + file->private_data = fuse_file_get(ff); + fuse_finish_open(inode, file); } - fuse_finish_open(inode, file, ff, &outopen); - return 0; + return err; - out_free_ff: +out_free_ff: fuse_file_free(ff); - out_put_request: +out_put_request: fuse_put_request(fc, req); - out: +out_put_forget_req: + kfree(forget); +out_err: return err; } +static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); +static int fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct file *file, unsigned flags, + umode_t mode, int *opened) +{ + int err; + struct fuse_conn *fc = get_fuse_conn(dir); + struct dentry *res = NULL; + + if (d_unhashed(entry)) { + res = fuse_lookup(dir, entry, 0); + if (IS_ERR(res)) + return PTR_ERR(res); + + if (res) + entry = res; + } + + if (!(flags & O_CREAT) || entry->d_inode) + goto no_open; + + /* Only creates */ + *opened |= FILE_CREATED; + + if (fc->no_create) + goto mknod; + + err = fuse_create_open(dir, entry, file, flags, mode, opened); + if (err == -ENOSYS) { + fc->no_create = 1; + goto mknod; + } +out_dput: + dput(res); + return err; + +mknod: + err = fuse_mknod(dir, entry, mode, 0); + if (err) + goto out_dput; +no_open: + return finish_no_open(file, res); +} + /* * Code shared between mknod, mkdir, symlink and link */ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, struct inode *dir, struct dentry *entry, - int mode) + umode_t mode) { struct fuse_entry_out outarg; struct inode *inode; int err; + struct fuse_forget_link *forget; + forget = fuse_alloc_forget(); + if (!forget) { + fuse_put_request(fc, req); + return -ENOMEM; + } + + memset(&outarg, 0, sizeof(outarg)); req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + else + req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; - if (err) { - fuse_put_request(fc, req); - return err; - } + fuse_put_request(fc, req); + if (err) + goto out_put_forget_req; + err = -EIO; if (invalid_nodeid(outarg.nodeid)) - goto out_put_request; + goto out_put_forget_req; if ((outarg.attr.mode ^ mode) & S_IFMT) - goto out_put_request; + goto out_put_forget_req; inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr); + &outarg.attr, entry_attr_timeout(&outarg), 0); if (!inode) { - fuse_send_forget(fc, req, outarg.nodeid, 1); + fuse_queue_forget(fc, forget, outarg.nodeid, 1); return -ENOMEM; } - fuse_put_request(fc, req); + kfree(forget); - if (dir_alias(inode)) { - iput(inode); - return -EIO; - } + err = d_instantiate_no_diralias(entry, inode); + if (err) + return err; - d_instantiate(entry, inode); - fuse_change_timeout(entry, &outarg); + fuse_change_entry_timeout(entry, &outarg); fuse_invalidate_attr(dir); return 0; - out_put_request: - fuse_put_request(fc, req); + out_put_forget_req: + kfree(forget); return err; } -static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, +static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev) { struct fuse_mknod_in inarg; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_request(fc); - if (!req) - return -EINTR; + struct fuse_req *req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + if (!fc->dont_mask) + mode &= ~current_umask(); memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKNOD; req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; return create_new_entry(fc, req, dir, entry, mode); } -static int fuse_create(struct inode *dir, struct dentry *entry, int mode, - struct nameidata *nd) +static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, + bool excl) { - if (nd && (nd->flags & LOOKUP_CREATE)) { - int err = fuse_create_open(dir, entry, mode, nd); - if (err != -ENOSYS) - return err; - /* Fall back on mknod */ - } return fuse_mknod(dir, entry, mode, 0); } -static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) +static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) { struct fuse_mkdir_in inarg; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_request(fc); - if (!req) - return -EINTR; + struct fuse_req *req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + if (!fc->dont_mask) + mode &= ~current_umask(); memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKDIR; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); @@ -422,9 +667,9 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, { struct fuse_conn *fc = get_fuse_conn(dir); unsigned len = strlen(link) + 1; - struct fuse_req *req = fuse_get_request(fc); - if (!req) - return -EINTR; + struct fuse_req *req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); req->in.h.opcode = FUSE_SYMLINK; req->in.numargs = 2; @@ -435,33 +680,49 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, return create_new_entry(fc, req, dir, entry, S_IFLNK); } +static inline void fuse_update_ctime(struct inode *inode) +{ + if (!IS_NOCMTIME(inode)) { + inode->i_ctime = current_fs_time(inode->i_sb); + mark_inode_dirty_sync(inode); + } +} + static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_request(fc); - if (!req) - return -EINTR; + struct fuse_req *req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); req->in.h.opcode = FUSE_UNLINK; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { struct inode *inode = entry->d_inode; - - /* Set nlink to zero so the inode can be cleared, if - the inode does have more links this will be - discovered at the next lookup/getattr */ - inode->i_nlink = 0; + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + /* + * If i_nlink == 0 then unlink doesn't make sense, yet this can + * happen if userspace filesystem is careless. It would be + * difficult to enforce correct nlink usage so just ignore this + * condition here + */ + if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&fc->lock); fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); fuse_invalidate_entry_cache(entry); + fuse_update_ctime(inode); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -471,21 +732,20 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_request(fc); - if (!req) - return -EINTR; + struct fuse_req *req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); req->in.h.opcode = FUSE_RMDIR; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { - entry->d_inode->i_nlink = 0; + clear_nlink(entry->d_inode); fuse_invalidate_attr(dir); fuse_invalidate_entry_cache(entry); } else if (err == -EINTR) @@ -493,40 +753,54 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) return err; } -static int fuse_rename(struct inode *olddir, struct dentry *oldent, - struct inode *newdir, struct dentry *newent) +static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags, int opcode, size_t argsize) { int err; - struct fuse_rename_in inarg; + struct fuse_rename2_in inarg; struct fuse_conn *fc = get_fuse_conn(olddir); - struct fuse_req *req = fuse_get_request(fc); - if (!req) - return -EINTR; + struct fuse_req *req; - memset(&inarg, 0, sizeof(inarg)); + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + memset(&inarg, 0, argsize); inarg.newdir = get_node_id(newdir); - req->in.h.opcode = FUSE_RENAME; + inarg.flags = flags; + req->in.h.opcode = opcode; req->in.h.nodeid = get_node_id(olddir); - req->inode = olddir; - req->inode2 = newdir; req->in.numargs = 3; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = argsize; req->in.args[0].value = &inarg; req->in.args[1].size = oldent->d_name.len + 1; req->in.args[1].value = oldent->d_name.name; req->in.args[2].size = newent->d_name.len + 1; req->in.args[2].value = newent->d_name.name; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { + /* ctime changes */ + fuse_invalidate_attr(oldent->d_inode); + fuse_update_ctime(oldent->d_inode); + + if (flags & RENAME_EXCHANGE) { + fuse_invalidate_attr(newent->d_inode); + fuse_update_ctime(newent->d_inode); + } + fuse_invalidate_attr(olddir); if (olddir != newdir) fuse_invalidate_attr(newdir); /* newent will end up negative */ - if (newent->d_inode) + if (!(flags & RENAME_EXCHANGE) && newent->d_inode) { + fuse_invalidate_attr(newent->d_inode); fuse_invalidate_entry_cache(newent); + fuse_update_ctime(newent->d_inode); + } } else if (err == -EINTR) { /* If request was interrupted, DEITY only knows if the rename actually took place. If the invalidation @@ -541,6 +815,42 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, return err; } +static int fuse_rename2(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags) +{ + struct fuse_conn *fc = get_fuse_conn(olddir); + int err; + + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + if (flags) { + if (fc->no_rename2 || fc->minor < 23) + return -EINVAL; + + err = fuse_rename_common(olddir, oldent, newdir, newent, flags, + FUSE_RENAME2, + sizeof(struct fuse_rename2_in)); + if (err == -ENOSYS) { + fc->no_rename2 = 1; + err = -EINVAL; + } + } else { + err = fuse_rename_common(olddir, oldent, newdir, newent, 0, + FUSE_RENAME, + sizeof(struct fuse_rename_in)); + } + + return err; +} + +static int fuse_rename(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent) +{ + return fuse_rename2(olddir, oldent, newdir, newent, 0); +} + static int fuse_link(struct dentry *entry, struct inode *newdir, struct dentry *newent) { @@ -548,14 +858,13 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, struct fuse_link_in inarg; struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_request(fc); - if (!req) - return -EINTR; + struct fuse_req *req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); inarg.oldnodeid = get_node_id(inode); req->in.h.opcode = FUSE_LINK; - req->inode2 = inode; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -568,46 +877,208 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, will reflect changes in the backing inode (link count, etc.) */ - if (!err || err == -EINTR) + if (!err) { + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + inc_nlink(inode); + spin_unlock(&fc->lock); fuse_invalidate_attr(inode); + fuse_update_ctime(inode); + } else if (err == -EINTR) { + fuse_invalidate_attr(inode); + } return err; } -int fuse_do_getattr(struct inode *inode) +static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, + struct kstat *stat) +{ + unsigned int blkbits; + struct fuse_conn *fc = get_fuse_conn(inode); + + /* see the comment in fuse_change_attributes() */ + if (fc->writeback_cache && S_ISREG(inode->i_mode)) { + attr->size = i_size_read(inode); + attr->mtime = inode->i_mtime.tv_sec; + attr->mtimensec = inode->i_mtime.tv_nsec; + attr->ctime = inode->i_ctime.tv_sec; + attr->ctimensec = inode->i_ctime.tv_nsec; + } + + stat->dev = inode->i_sb->s_dev; + stat->ino = attr->ino; + stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); + stat->nlink = attr->nlink; + stat->uid = make_kuid(&init_user_ns, attr->uid); + stat->gid = make_kgid(&init_user_ns, attr->gid); + stat->rdev = inode->i_rdev; + stat->atime.tv_sec = attr->atime; + stat->atime.tv_nsec = attr->atimensec; + stat->mtime.tv_sec = attr->mtime; + stat->mtime.tv_nsec = attr->mtimensec; + stat->ctime.tv_sec = attr->ctime; + stat->ctime.tv_nsec = attr->ctimensec; + stat->size = attr->size; + stat->blocks = attr->blocks; + + if (attr->blksize != 0) + blkbits = ilog2(attr->blksize); + else + blkbits = inode->i_sb->s_blocksize_bits; + + stat->blksize = 1 << blkbits; +} + +static int fuse_do_getattr(struct inode *inode, struct kstat *stat, + struct file *file) { int err; - struct fuse_attr_out arg; + struct fuse_getattr_in inarg; + struct fuse_attr_out outarg; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_request(fc); - if (!req) - return -EINTR; + struct fuse_req *req; + u64 attr_version; + + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + attr_version = fuse_get_attr_version(fc); + memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); + /* Directories have separate file-handle space */ + if (file && S_ISREG(inode->i_mode)) { + struct fuse_file *ff = file->private_data; + + inarg.getattr_flags |= FUSE_GETATTR_FH; + inarg.fh = ff->fh; + } req->in.h.opcode = FUSE_GETATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; req->out.numargs = 1; - req->out.args[0].size = sizeof(arg); - req->out.args[0].value = &arg; - request_send(fc, req); + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + else + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { - if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) { + if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; } else { - struct fuse_inode *fi = get_fuse_inode(inode); - fuse_change_attributes(inode, &arg.attr); - fi->i_time = time_to_jiffies(arg.attr_valid, - arg.attr_valid_nsec); + fuse_change_attributes(inode, &outarg.attr, + attr_timeout(&outarg), + attr_version); + if (stat) + fuse_fillattr(inode, &outarg.attr, stat); } } return err; } +int fuse_update_attributes(struct inode *inode, struct kstat *stat, + struct file *file, bool *refreshed) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + int err; + bool r; + + if (time_before64(fi->i_time, get_jiffies_64())) { + r = true; + err = fuse_do_getattr(inode, stat, file); + } else { + r = false; + err = 0; + if (stat) { + generic_fillattr(inode, stat); + stat->mode = fi->orig_i_mode; + stat->ino = fi->orig_ino; + } + } + + if (refreshed != NULL) + *refreshed = r; + + return err; +} + +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + u64 child_nodeid, struct qstr *name) +{ + int err = -ENOTDIR; + struct inode *parent; + struct dentry *dir; + struct dentry *entry; + + parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); + if (!parent) + return -ENOENT; + + mutex_lock(&parent->i_mutex); + if (!S_ISDIR(parent->i_mode)) + goto unlock; + + err = -ENOENT; + dir = d_find_alias(parent); + if (!dir) + goto unlock; + + entry = d_lookup(dir, name); + dput(dir); + if (!entry) + goto unlock; + + fuse_invalidate_attr(parent); + fuse_invalidate_entry(entry); + + if (child_nodeid != 0 && entry->d_inode) { + mutex_lock(&entry->d_inode->i_mutex); + if (get_node_id(entry->d_inode) != child_nodeid) { + err = -ENOENT; + goto badentry; + } + if (d_mountpoint(entry)) { + err = -EBUSY; + goto badentry; + } + if (S_ISDIR(entry->d_inode->i_mode)) { + shrink_dcache_parent(entry); + if (!simple_empty(entry)) { + err = -ENOTEMPTY; + goto badentry; + } + entry->d_inode->i_flags |= S_DEAD; + } + dont_mount(entry); + clear_nlink(entry->d_inode); + err = 0; + badentry: + mutex_unlock(&entry->d_inode->i_mutex); + if (!err) + d_delete(entry); + } else { + err = 0; + } + dput(entry); + + unlock: + mutex_unlock(&parent->i_mutex); + iput(parent); + return err; +} + /* * Calling into a user-controlled filesystem gives the filesystem - * daemon ptrace-like capabilities over the requester process. This + * daemon ptrace-like capabilities over the current process. This * means, that the filesystem daemon is able to record the exact * filesystem operations performed, and can also control the behavior * of the requester process in otherwise impossible ways. For example @@ -618,46 +1089,25 @@ int fuse_do_getattr(struct inode *inode) * for which the owner of the mount has ptrace privilege. This * excludes processes started by other users, suid or sgid processes. */ -static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) +int fuse_allow_current_process(struct fuse_conn *fc) { + const struct cred *cred; + if (fc->flags & FUSE_ALLOW_OTHER) return 1; - if (task->euid == fc->user_id && - task->suid == fc->user_id && - task->uid == fc->user_id && - task->egid == fc->group_id && - task->sgid == fc->group_id && - task->gid == fc->group_id) + cred = current_cred(); + if (uid_eq(cred->euid, fc->user_id) && + uid_eq(cred->suid, fc->user_id) && + uid_eq(cred->uid, fc->user_id) && + gid_eq(cred->egid, fc->group_id) && + gid_eq(cred->sgid, fc->group_id) && + gid_eq(cred->gid, fc->group_id)) return 1; return 0; } -/* - * Check whether the inode attributes are still valid - * - * If the attribute validity timeout has expired, then fetch the fresh - * attributes with a 'getattr' request - * - * I'm not sure why cached attributes are never returned for the root - * inode, this is probably being too cautious. - */ -static int fuse_revalidate(struct dentry *entry) -{ - struct inode *inode = entry->d_inode; - struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_conn *fc = get_fuse_conn(inode); - - if (!fuse_allow_task(fc, current)) - return -EACCES; - if (get_node_id(inode) != FUSE_ROOT_ID && - time_before_eq(jiffies, fi->i_time)) - return 0; - - return fuse_do_getattr(inode); -} - static int fuse_access(struct inode *inode, int mask) { struct fuse_conn *fc = get_fuse_conn(inode); @@ -665,22 +1115,23 @@ static int fuse_access(struct inode *inode, int mask) struct fuse_access_in inarg; int err; + BUG_ON(mask & MAY_NOT_BLOCK); + if (fc->no_access) return 0; - req = fuse_get_request(fc); - if (!req) - return -EINTR; + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); - inarg.mask = mask; + inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); req->in.h.opcode = FUSE_ACCESS; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) { @@ -690,6 +1141,14 @@ static int fuse_access(struct inode *inode, int mask) return err; } +static int fuse_perm_getattr(struct inode *inode, int mask) +{ + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + + return fuse_do_getattr(inode, NULL, NULL); +} + /* * Check permission. The two basic access models of FUSE are: * @@ -703,105 +1162,293 @@ static int fuse_access(struct inode *inode, int mask) * access request is sent. Execute permission is still checked * locally based on file mode. */ -static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) +static int fuse_permission(struct inode *inode, int mask) { struct fuse_conn *fc = get_fuse_conn(inode); + bool refreshed = false; + int err = 0; - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; - else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { - int err = generic_permission(inode, mask, NULL); + + /* + * If attributes are needed, refresh them before proceeding + */ + if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) || + ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { + struct fuse_inode *fi = get_fuse_inode(inode); + + if (time_before64(fi->i_time, get_jiffies_64())) { + refreshed = true; + + err = fuse_perm_getattr(inode, mask); + if (err) + return err; + } + } + + if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { + err = generic_permission(inode, mask); /* If permission is denied, try to refresh file attributes. This is also needed, because the root node will at first have no permissions */ - if (err == -EACCES) { - err = fuse_do_getattr(inode); + if (err == -EACCES && !refreshed) { + err = fuse_perm_getattr(inode, mask); if (!err) - err = generic_permission(inode, mask, NULL); + err = generic_permission(inode, mask); } /* Note: the opposite of the above test does not exist. So if permissions are revoked this won't be noticed immediately, only after the attribute timeout has expired */ - - return err; - } else { - int mode = inode->i_mode; - if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO)) - return -EACCES; - - if (nd && (nd->flags & LOOKUP_ACCESS)) - return fuse_access(inode, mask); - return 0; + } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { + err = fuse_access(inode, mask); + } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { + if (!(inode->i_mode & S_IXUGO)) { + if (refreshed) + return -EACCES; + + err = fuse_perm_getattr(inode, mask); + if (!err && !(inode->i_mode & S_IXUGO)) + return -EACCES; + } } + return err; } static int parse_dirfile(char *buf, size_t nbytes, struct file *file, - void *dstbuf, filldir_t filldir) + struct dir_context *ctx) { while (nbytes >= FUSE_NAME_OFFSET) { struct fuse_dirent *dirent = (struct fuse_dirent *) buf; size_t reclen = FUSE_DIRENT_SIZE(dirent); - int over; if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) return -EIO; if (reclen > nbytes) break; + if (memchr(dirent->name, '/', dirent->namelen) != NULL) + return -EIO; - over = filldir(dstbuf, dirent->name, dirent->namelen, - file->f_pos, dirent->ino, dirent->type); - if (over) + if (!dir_emit(ctx, dirent->name, dirent->namelen, + dirent->ino, dirent->type)) break; buf += reclen; nbytes -= reclen; - file->f_pos = dirent->off; + ctx->pos = dirent->off; } return 0; } -static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, - size_t count) +static int fuse_direntplus_link(struct file *file, + struct fuse_direntplus *direntplus, + u64 attr_version) { - return fuse_send_read_common(req, file, inode, pos, count, 1); + int err; + struct fuse_entry_out *o = &direntplus->entry_out; + struct fuse_dirent *dirent = &direntplus->dirent; + struct dentry *parent = file->f_path.dentry; + struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); + struct dentry *dentry; + struct dentry *alias; + struct inode *dir = parent->d_inode; + struct fuse_conn *fc; + struct inode *inode; + + if (!o->nodeid) { + /* + * Unlike in the case of fuse_lookup, zero nodeid does not mean + * ENOENT. Instead, it only means the userspace filesystem did + * not want to return attributes/handle for this entry. + * + * So do nothing. + */ + return 0; + } + + if (name.name[0] == '.') { + /* + * We could potentially refresh the attributes of the directory + * and its parent? + */ + if (name.len == 1) + return 0; + if (name.name[1] == '.' && name.len == 2) + return 0; + } + + if (invalid_nodeid(o->nodeid)) + return -EIO; + if (!fuse_valid_type(o->attr.mode)) + return -EIO; + + fc = get_fuse_conn(dir); + + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry) { + inode = dentry->d_inode; + if (!inode) { + d_drop(dentry); + } else if (get_node_id(inode) != o->nodeid || + ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { + err = d_invalidate(dentry); + if (err) + goto out; + } else if (is_bad_inode(inode)) { + err = -EIO; + goto out; + } else { + struct fuse_inode *fi; + fi = get_fuse_inode(inode); + spin_lock(&fc->lock); + fi->nlookup++; + spin_unlock(&fc->lock); + + fuse_change_attributes(inode, &o->attr, + entry_attr_timeout(o), + attr_version); + + /* + * The other branch to 'found' comes via fuse_iget() + * which bumps nlookup inside + */ + goto found; + } + dput(dentry); + } + + dentry = d_alloc(parent, &name); + err = -ENOMEM; + if (!dentry) + goto out; + + inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, + &o->attr, entry_attr_timeout(o), attr_version); + if (!inode) + goto out; + + alias = d_materialise_unique(dentry, inode); + err = PTR_ERR(alias); + if (IS_ERR(alias)) + goto out; + + if (alias) { + dput(dentry); + dentry = alias; + } + +found: + if (fc->readdirplus_auto) + set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); + fuse_change_entry_timeout(dentry, o); + + err = 0; +out: + dput(dentry); + return err; } -static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) +static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, + struct dir_context *ctx, u64 attr_version) { - int err; + struct fuse_direntplus *direntplus; + struct fuse_dirent *dirent; + size_t reclen; + int over = 0; + int ret; + + while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { + direntplus = (struct fuse_direntplus *) buf; + dirent = &direntplus->dirent; + reclen = FUSE_DIRENTPLUS_SIZE(direntplus); + + if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) + return -EIO; + if (reclen > nbytes) + break; + if (memchr(dirent->name, '/', dirent->namelen) != NULL) + return -EIO; + + if (!over) { + /* We fill entries into dstbuf only as much as + it can hold. But we still continue iterating + over remaining entries to link them. If not, + we need to send a FORGET for each of those + which we did not link. + */ + over = !dir_emit(ctx, dirent->name, dirent->namelen, + dirent->ino, dirent->type); + ctx->pos = dirent->off; + } + + buf += reclen; + nbytes -= reclen; + + ret = fuse_direntplus_link(file, direntplus, attr_version); + if (ret) + fuse_force_forget(file, direntplus->entry_out.nodeid); + } + + return 0; +} + +static int fuse_readdir(struct file *file, struct dir_context *ctx) +{ + int plus, err; size_t nbytes; struct page *page; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; + u64 attr_version = 0; if (is_bad_inode(inode)) return -EIO; - req = fuse_get_request(fc); - if (!req) - return -EINTR; + req = fuse_get_req(fc, 1); + if (IS_ERR(req)) + return PTR_ERR(req); page = alloc_page(GFP_KERNEL); if (!page) { fuse_put_request(fc, req); return -ENOMEM; } + + plus = fuse_use_readdirplus(inode, ctx); + req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE); + req->page_descs[0].length = PAGE_SIZE; + if (plus) { + attr_version = fuse_get_attr_version(fc); + fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, + FUSE_READDIRPLUS); + } else { + fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, + FUSE_READDIR); + } + fuse_request_send(fc, req); + nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); - if (!err) - err = parse_dirfile(page_address(page), nbytes, file, dstbuf, - filldir); + if (!err) { + if (plus) { + err = parse_dirplusfile(page_address(page), nbytes, + file, ctx, + attr_version); + } else { + err = parse_dirfile(page_address(page), nbytes, file, + ctx); + } + } __free_page(page); - fuse_invalidate_attr(inode); /* atime changed */ + fuse_invalidate_atime(inode); return err; } @@ -809,11 +1456,11 @@ static char *read_link(struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_request(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); char *link; - if (!req) - return ERR_PTR(-EINTR); + if (IS_ERR(req)) + return ERR_CAST(req); link = (char *) __get_free_page(GFP_KERNEL); if (!link) { @@ -822,12 +1469,11 @@ static char *read_link(struct dentry *dentry) } req->in.h.opcode = FUSE_READLINK; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->out.argvar = 1; req->out.numargs = 1; req->out.args[0].size = PAGE_SIZE - 1; req->out.args[0].value = link; - request_send(fc, req); + fuse_request_send(fc, req); if (req->out.h.error) { free_page((unsigned long) link); link = ERR_PTR(req->out.h.error); @@ -835,7 +1481,7 @@ static char *read_link(struct dentry *dentry) link[req->out.args[0].size] = '\0'; out: fuse_put_request(fc, req); - fuse_invalidate_attr(inode); /* atime changed */ + fuse_invalidate_atime(inode); return link; } @@ -858,129 +1504,343 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c) static int fuse_dir_open(struct inode *inode, struct file *file) { - return fuse_open_common(inode, file, 1); + return fuse_open_common(inode, file, true); } static int fuse_dir_release(struct inode *inode, struct file *file) { - return fuse_release_common(inode, file, 1); + fuse_release_common(file, FUSE_RELEASEDIR); + + return 0; +} + +static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + return fuse_fsync_common(file, start, end, datasync, 1); +} + +static long fuse_dir_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); + + /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */ + if (fc->minor < 18) + return -ENOTTY; + + return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR); +} + +static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); + + if (fc->minor < 18) + return -ENOTTY; + + return fuse_ioctl_common(file, cmd, arg, + FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); } -static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) +static bool update_mtime(unsigned ivalid, bool trust_local_mtime) { - /* nfsd can call this with no file */ - return file ? fuse_fsync_common(file, de, datasync, 1) : 0; + /* Always update if mtime is explicitly set */ + if (ivalid & ATTR_MTIME_SET) + return true; + + /* Or if kernel i_mtime is the official one */ + if (trust_local_mtime) + return true; + + /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ + if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) + return false; + + /* In all other cases update */ + return true; } -static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) +static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, + bool trust_local_cmtime) { unsigned ivalid = iattr->ia_valid; if (ivalid & ATTR_MODE) arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; if (ivalid & ATTR_UID) - arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid; + arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid); if (ivalid & ATTR_GID) - arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; + arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid); if (ivalid & ATTR_SIZE) arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; - /* You can only _set_ these together (they may change by themselves) */ - if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { - arg->valid |= FATTR_ATIME | FATTR_MTIME; + if (ivalid & ATTR_ATIME) { + arg->valid |= FATTR_ATIME; arg->atime = iattr->ia_atime.tv_sec; + arg->atimensec = iattr->ia_atime.tv_nsec; + if (!(ivalid & ATTR_ATIME_SET)) + arg->valid |= FATTR_ATIME_NOW; + } + if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { + arg->valid |= FATTR_MTIME; arg->mtime = iattr->ia_mtime.tv_sec; + arg->mtimensec = iattr->ia_mtime.tv_nsec; + if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) + arg->valid |= FATTR_MTIME_NOW; } - if (ivalid & ATTR_FILE) { - struct fuse_file *ff = iattr->ia_file->private_data; - arg->valid |= FATTR_FH; - arg->fh = ff->fh; + if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { + arg->valid |= FATTR_CTIME; + arg->ctime = iattr->ia_ctime.tv_sec; + arg->ctimensec = iattr->ia_ctime.tv_nsec; } } /* + * Prevent concurrent writepages on inode + * + * This is done by adding a negative bias to the inode write counter + * and waiting for all pending writes to finish. + */ +void fuse_set_nowrite(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + BUG_ON(!mutex_is_locked(&inode->i_mutex)); + + spin_lock(&fc->lock); + BUG_ON(fi->writectr < 0); + fi->writectr += FUSE_NOWRITE; + spin_unlock(&fc->lock); + wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); +} + +/* + * Allow writepages on inode + * + * Remove the bias from the writecounter and send any queued + * writepages. + */ +static void __fuse_release_nowrite(struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + + BUG_ON(fi->writectr != FUSE_NOWRITE); + fi->writectr = 0; + fuse_flush_writepages(inode); +} + +void fuse_release_nowrite(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + spin_lock(&fc->lock); + __fuse_release_nowrite(inode); + spin_unlock(&fc->lock); +} + +static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, + struct inode *inode, + struct fuse_setattr_in *inarg_p, + struct fuse_attr_out *outarg_p) +{ + req->in.h.opcode = FUSE_SETATTR; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(*inarg_p); + req->in.args[0].value = inarg_p; + req->out.numargs = 1; + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + else + req->out.args[0].size = sizeof(*outarg_p); + req->out.args[0].value = outarg_p; +} + +/* + * Flush inode->i_mtime to the server + */ +int fuse_flush_times(struct inode *inode, struct fuse_file *ff) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_setattr_in inarg; + struct fuse_attr_out outarg; + int err; + + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); + + inarg.valid = FATTR_MTIME; + inarg.mtime = inode->i_mtime.tv_sec; + inarg.mtimensec = inode->i_mtime.tv_nsec; + if (fc->minor >= 23) { + inarg.valid |= FATTR_CTIME; + inarg.ctime = inode->i_ctime.tv_sec; + inarg.ctimensec = inode->i_ctime.tv_nsec; + } + if (ff) { + inarg.valid |= FATTR_FH; + inarg.fh = ff->fh; + } + fuse_setattr_fill(fc, req, inode, &inarg, &outarg); + fuse_request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + + return err; +} + +/* * Set attributes, and at the same time refresh them. * * Truncation is slightly complicated, because the 'truncate' request * may fail, in which case we don't want to touch the mapping. - * vmtruncate() doesn't allow for this case. So do the rlimit - * checking by hand and call vmtruncate() only after the file has - * actually been truncated. + * vmtruncate() doesn't allow for this case, so do the rlimit checking + * and the actual truncation by hand. */ -static int fuse_setattr(struct dentry *entry, struct iattr *attr) +int fuse_do_setattr(struct inode *inode, struct iattr *attr, + struct file *file) { - struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; + bool is_truncate = false; + bool is_wb = fc->writeback_cache; + loff_t oldsize; int err; - int is_truncate = 0; + bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); - if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { - err = inode_change_ok(inode, attr); - if (err) - return err; - } + if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) + attr->ia_valid |= ATTR_FORCE; - if (attr->ia_valid & ATTR_SIZE) { - unsigned long limit; - is_truncate = 1; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (attr->ia_valid & ATTR_OPEN) { + if (fc->atomic_o_trunc) + return 0; + file = NULL; } - req = fuse_get_request(fc); - if (!req) - return -EINTR; + if (attr->ia_valid & ATTR_SIZE) + is_truncate = true; + + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + if (is_truncate) { + fuse_set_nowrite(inode); + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + if (trust_local_cmtime && attr->ia_size != inode->i_size) + attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; + } memset(&inarg, 0, sizeof(inarg)); - iattr_to_fattr(attr, &inarg); - req->in.h.opcode = FUSE_SETATTR; - req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; - request_send(fc, req); + memset(&outarg, 0, sizeof(outarg)); + iattr_to_fattr(attr, &inarg, trust_local_cmtime); + if (file) { + struct fuse_file *ff = file->private_data; + inarg.valid |= FATTR_FH; + inarg.fh = ff->fh; + } + if (attr->ia_valid & ATTR_SIZE) { + /* For mandatory locking in truncate */ + inarg.valid |= FATTR_LOCKOWNER; + inarg.lock_owner = fuse_lock_owner_id(fc, current->files); + } + fuse_setattr_fill(fc, req, inode, &inarg, &outarg); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); - if (!err) { - if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); - err = -EIO; - } else { - if (is_truncate) { - loff_t origsize = i_size_read(inode); - i_size_write(inode, outarg.attr.size); - if (origsize > outarg.attr.size) - vmtruncate(inode, outarg.attr.size); - } - fuse_change_attributes(inode, &outarg.attr); - fi->i_time = time_to_jiffies(outarg.attr_valid, - outarg.attr_valid_nsec); - } - } else if (err == -EINTR) - fuse_invalidate_attr(inode); + if (err) { + if (err == -EINTR) + fuse_invalidate_attr(inode); + goto error; + } + + if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + make_bad_inode(inode); + err = -EIO; + goto error; + } + spin_lock(&fc->lock); + /* the kernel maintains i_mtime locally */ + if (trust_local_cmtime) { + if (attr->ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (attr->ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; + /* FIXME: clear I_DIRTY_SYNC? */ + } + + fuse_change_attributes_common(inode, &outarg.attr, + attr_timeout(&outarg)); + oldsize = inode->i_size; + /* see the comment in fuse_change_attributes() */ + if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) + i_size_write(inode, outarg.attr.size); + + if (is_truncate) { + /* NOTE: this may release/reacquire fc->lock */ + __fuse_release_nowrite(inode); + } + spin_unlock(&fc->lock); + + /* + * Only call invalidate_inode_pages2() after removing + * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. + */ + if ((is_truncate || !is_wb) && + S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { + truncate_pagecache(inode, outarg.attr.size); + invalidate_inode_pages2(inode->i_mapping); + } + + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + return 0; + +error: + if (is_truncate) + fuse_release_nowrite(inode); + + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return err; } +static int fuse_setattr(struct dentry *entry, struct iattr *attr) +{ + struct inode *inode = entry->d_inode; + + if (!fuse_allow_current_process(get_fuse_conn(inode))) + return -EACCES; + + if (attr->ia_valid & ATTR_FILE) + return fuse_do_setattr(inode, attr, attr->ia_file); + else + return fuse_do_setattr(inode, attr, NULL); +} + static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, struct kstat *stat) { struct inode *inode = entry->d_inode; - int err = fuse_revalidate(entry); - if (!err) - generic_fillattr(inode, stat); + struct fuse_conn *fc = get_fuse_conn(inode); - return err; + if (!fuse_allow_current_process(fc)) + return -EACCES; + + return fuse_update_attributes(inode, stat, NULL, NULL); } static int fuse_setxattr(struct dentry *entry, const char *name, @@ -995,16 +1855,15 @@ static int fuse_setxattr(struct dentry *entry, const char *name, if (fc->no_setxattr) return -EOPNOTSUPP; - req = fuse_get_request(fc); - if (!req) - return -EINTR; + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); inarg.size = size; inarg.flags = flags; req->in.h.opcode = FUSE_SETXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 3; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1012,13 +1871,17 @@ static int fuse_setxattr(struct dentry *entry, const char *name, req->in.args[1].value = name; req->in.args[2].size = size; req->in.args[2].value = value; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) { fc->no_setxattr = 1; err = -EOPNOTSUPP; } + if (!err) { + fuse_invalidate_attr(inode); + fuse_update_ctime(inode); + } return err; } @@ -1035,15 +1898,14 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, if (fc->no_getxattr) return -EOPNOTSUPP; - req = fuse_get_request(fc); - if (!req) - return -EINTR; + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); inarg.size = size; req->in.h.opcode = FUSE_GETXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1059,7 +1921,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; } - request_send(fc, req); + fuse_request_send(fc, req); ret = req->out.h.error; if (!ret) ret = size ? req->out.args[0].size : outarg.size; @@ -1082,18 +1944,20 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (!fuse_allow_current_process(fc)) + return -EACCES; + if (fc->no_listxattr) return -EOPNOTSUPP; - req = fuse_get_request(fc); - if (!req) - return -EINTR; + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); inarg.size = size; req->in.h.opcode = FUSE_LISTXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1107,7 +1971,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; } - request_send(fc, req); + fuse_request_send(fc, req); ret = req->out.h.error; if (!ret) ret = size ? req->out.args[0].size : outarg.size; @@ -1131,36 +1995,41 @@ static int fuse_removexattr(struct dentry *entry, const char *name) if (fc->no_removexattr) return -EOPNOTSUPP; - req = fuse_get_request(fc); - if (!req) - return -EINTR; + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); req->in.h.opcode = FUSE_REMOVEXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = strlen(name) + 1; req->in.args[0].value = name; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) { fc->no_removexattr = 1; err = -EOPNOTSUPP; } + if (!err) { + fuse_invalidate_attr(inode); + fuse_update_ctime(inode); + } return err; } -static struct inode_operations fuse_dir_inode_operations = { +static const struct inode_operations fuse_dir_inode_operations = { .lookup = fuse_lookup, .mkdir = fuse_mkdir, .symlink = fuse_symlink, .unlink = fuse_unlink, .rmdir = fuse_rmdir, .rename = fuse_rename, + .rename2 = fuse_rename2, .link = fuse_link, .setattr = fuse_setattr, .create = fuse_create, + .atomic_open = fuse_atomic_open, .mknod = fuse_mknod, .permission = fuse_permission, .getattr = fuse_getattr, @@ -1170,16 +2039,18 @@ static struct inode_operations fuse_dir_inode_operations = { .removexattr = fuse_removexattr, }; -static struct file_operations fuse_dir_operations = { +static const struct file_operations fuse_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = fuse_readdir, + .iterate = fuse_readdir, .open = fuse_dir_open, .release = fuse_dir_release, .fsync = fuse_dir_fsync, + .unlocked_ioctl = fuse_dir_ioctl, + .compat_ioctl = fuse_dir_compat_ioctl, }; -static struct inode_operations fuse_common_inode_operations = { +static const struct inode_operations fuse_common_inode_operations = { .setattr = fuse_setattr, .permission = fuse_permission, .getattr = fuse_getattr, @@ -1189,7 +2060,7 @@ static struct inode_operations fuse_common_inode_operations = { .removexattr = fuse_removexattr, }; -static struct inode_operations fuse_symlink_inode_operations = { +static const struct inode_operations fuse_symlink_inode_operations = { .setattr = fuse_setattr, .follow_link = fuse_follow_link, .put_link = fuse_put_link, |
