From 9cfa1098dcfb34f71c5f3b7bcdbbb435a0cecab2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jul 2011 11:26:18 -0700 Subject: ceph: use flag bit for at_end readdir flag This saves us a word of memory per file. Reviewed-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/ceph/dir.c') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index ef8f08c343e..53b441fe78f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -133,7 +133,7 @@ more: d_unhashed(dentry) ? "!hashed" : "hashed", parent->d_subdirs.prev, parent->d_subdirs.next); if (p == &parent->d_subdirs) { - fi->at_end = 1; + fi->flags |= CEPH_F_ATEND; goto out_unlock; } spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -234,7 +234,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) const int max_bytes = fsc->mount_options->max_readdir_bytes; dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); - if (fi->at_end) + if (fi->flags & CEPH_F_ATEND) return 0; /* always start with . and .. */ @@ -403,7 +403,7 @@ more: dout("readdir next frag is %x\n", frag); goto more; } - fi->at_end = 1; + fi->flags |= CEPH_F_ATEND; /* * if dir_release_count still matches the dir, no dentries @@ -435,7 +435,7 @@ static void reset_readdir(struct ceph_file_info *fi) dput(fi->dentry); fi->dentry = NULL; } - fi->at_end = 0; + fi->flags &= ~CEPH_F_ATEND; } static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) @@ -458,7 +458,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; - fi->at_end = 0; + fi->flags &= ~CEPH_F_ATEND; } retval = offset; -- cgit v1.2.3-18-g5258 From 468640e32c7f6bfdaaa011095cc388786755d159 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jul 2011 11:28:11 -0700 Subject: ceph: fix ceph_lookup_open intent usage We weren't properly calling lookup_instantiate_filp when setting up the lookup intent, which could lead to file leakage on errors. So: - use separate helper for the hidden snapdir translation, immediately following the mds request - use ceph_finish_lookup for the final dentry/return value dance in the exit path - lookup_instantiate_filp on success Reported-by: Al Viro Reviewed-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'fs/ceph/dir.c') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 53b441fe78f..f39a409db0e 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -482,18 +482,10 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) } /* - * Process result of a lookup/open request. - * - * Mainly, make sure we return the final req->r_dentry (if it already - * existed) in place of the original VFS-provided dentry when they - * differ. - * - * Gracefully handle the case where the MDS replies with -ENOENT and - * no trace (which it may do, at its discretion, e.g., if it doesn't - * care to issue a lease on the negative dentry). + * Handle lookups for the hidden .snap directory. */ -struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, - struct dentry *dentry, int err) +int ceph_handle_snapdir(struct ceph_mds_request *req, + struct dentry *dentry, int err) { struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); struct inode *parent = dentry->d_parent->d_inode; @@ -510,7 +502,23 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, d_add(dentry, inode); err = 0; } + return err; +} +/* + * Figure out final result of a lookup/open request. + * + * Mainly, make sure we return the final req->r_dentry (if it already + * existed) in place of the original VFS-provided dentry when they + * differ. + * + * Gracefully handle the case where the MDS replies with -ENOENT and + * no trace (which it may do, at its discretion, e.g., if it doesn't + * care to issue a lease on the negative dentry). + */ +struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, + struct dentry *dentry, int err) +{ if (err == -ENOENT) { /* no trace? */ err = 0; @@ -605,6 +613,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_locked_dir = dir; err = ceph_mdsc_do_request(mdsc, NULL, req); + err = ceph_handle_snapdir(req, dentry, err); dentry = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); /* will dput(dentry) */ dout("lookup result=%p\n", dentry); -- cgit v1.2.3-18-g5258 From 48d0cbd1242aac969560ef8b90f26ee3b09a6a5c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jul 2011 11:30:15 -0700 Subject: ceph: handle racing calls to ceph_init_dentry The ->lookup() and prepopulate_readdir() callers are working with unhashed dentries, so we don't have to worry. The export.c callers, though, need to initialize something they got back from d_obtain_alias() and are potentially racing with other callers. Make sure we don't return unless the dentry is properly initialized (by us or someone else). Reported-by: Al Viro Reviewed-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/ceph/dir.c') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f39a409db0e..883c9546111 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -40,14 +40,6 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_fsdata) return 0; - if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ - ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) - d_set_d_op(dentry, &ceph_dentry_ops); - else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) - d_set_d_op(dentry, &ceph_snapdir_dentry_ops); - else - d_set_d_op(dentry, &ceph_snap_dentry_ops); - di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) return -ENOMEM; /* oh well */ @@ -58,10 +50,21 @@ int ceph_init_dentry(struct dentry *dentry) kmem_cache_free(ceph_dentry_cachep, di); goto out_unlock; } + + if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ + ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) + d_set_d_op(dentry, &ceph_dentry_ops); + else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) + d_set_d_op(dentry, &ceph_snapdir_dentry_ops); + else + d_set_d_op(dentry, &ceph_snap_dentry_ops); + di->dentry = dentry; di->lease_session = NULL; - dentry->d_fsdata = di; dentry->d_time = jiffies; + /* avoid reordering d_fsdata setup so that the check above is safe */ + smp_mb(); + dentry->d_fsdata = di; ceph_dentry_lru_add(dentry); out_unlock: spin_unlock(&dentry->d_lock); -- cgit v1.2.3-18-g5258 From 5f21c96dd5c615341963036ae8f5e4f5227a818d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jul 2011 11:30:29 -0700 Subject: ceph: protect access to d_parent d_parent is protected by d_lock: use it when looking up a dentry's parent directory inode. Also take a reference and drop it in the caller to avoid a use-after-free. Reported-by: Al Viro Reviewed-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/ceph/dir.c') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 883c9546111..ed296ec121d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -71,6 +71,21 @@ out_unlock: return 0; } +struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry) +{ + struct inode *inode = NULL; + + if (!dentry) + return NULL; + + spin_lock(&dentry->d_lock); + if (dentry->d_parent) { + inode = dentry->d_parent->d_inode; + ihold(inode); + } + spin_unlock(&dentry->d_lock); + return inode; +} /* -- cgit v1.2.3-18-g5258 From bf1c6aca96c9d2f117dc7e590c2bc2304e7febe1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jul 2011 11:30:43 -0700 Subject: ceph: protect d_parent access in ceph_d_revalidate Protect d_parent with d_lock. Carry a reference. Simplify the flow so that there is a single exit point and cleanup. Reviewed-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'fs/ceph/dir.c') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index ed296ec121d..31d27f8f826 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1024,36 +1024,38 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) */ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) { + int valid = 0; struct inode *dir; if (nd && nd->flags & LOOKUP_RCU) return -ECHILD; - dir = dentry->d_parent->d_inode; - dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode, ceph_dentry(dentry)->offset); + dir = ceph_get_dentry_parent_inode(dentry); + /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); - goto out_touch; + valid = 1; + } else if (dentry->d_inode && + ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) { + valid = 1; + } else if (dentry_lease_is_valid(dentry) || + dir_lease_is_valid(dir, dentry)) { + valid = 1; } - if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) - goto out_touch; - if (dentry_lease_is_valid(dentry) || - dir_lease_is_valid(dir, dentry)) - goto out_touch; - - dout("d_revalidate %p invalid\n", dentry); - d_drop(dentry); - return 0; -out_touch: - ceph_dentry_lru_touch(dentry); - return 1; + dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); + if (valid) + ceph_dentry_lru_touch(dentry); + else + d_drop(dentry); + iput(dir); + return valid; } /* -- cgit v1.2.3-18-g5258 From e5f86dc377e7ff2b4195831153a85a3e76fefff2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jul 2011 11:30:55 -0700 Subject: ceph: avoid d_parent in ceph_dentry_hash; fix ceph_encode_fh() hashing bug Have caller pass in a safely-obtained reference to the parent directory for calculating a dentry's hash valud. While we're here, simpify the flow through ceph_encode_fh() so that there is a single exit point and cleanup. Also fix a bug with the dentry hash calculation: calculate the hash for the dentry we were given, not its parent. Reviewed-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ceph/dir.c') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 31d27f8f826..33a19df7228 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1244,9 +1244,8 @@ void ceph_dentry_lru_del(struct dentry *dn) * Return name hash for a given dentry. This is dependent on * the parent directory's hash function. */ -unsigned ceph_dentry_hash(struct dentry *dn) +unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) { - struct inode *dir = dn->d_parent->d_inode; struct ceph_inode_info *dci = ceph_inode(dir); switch (dci->i_dir_layout.dl_dir_hash) { -- cgit v1.2.3-18-g5258 From 41b02e1f9bb87b07d792b64aaeb7af3d00d69cd2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jul 2011 11:31:14 -0700 Subject: ceph: explicitly reference rename old_dentry parent dir in request We carry a pin on the parent directory for the rename source and dest dentries. For the source it's r_locked_dir; we need to explicitly reference the old_dentry parent as well, since the dentry's d_parent may change between when the request was created and pinned and when it is freed. Reviewed-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ceph/dir.c') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 33a19df7228..7263f825d42 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -811,6 +811,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, req->r_dentry = dget(dentry); req->r_num_caps = 2; req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ + req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); req->r_locked_dir = dir; req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; @@ -909,6 +910,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, req->r_dentry = dget(new_dentry); req->r_num_caps = 2; req->r_old_dentry = dget(old_dentry); + req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); req->r_locked_dir = new_dir; req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; -- cgit v1.2.3-18-g5258 From d79698da32b317e96216236f265a9b72b78ae568 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jul 2011 11:31:26 -0700 Subject: ceph: document unlocked d_parent accesses For the most part we don't care about racing with rename when directing MDS requests; either the old or new parent is fine. Document that, and do some minor cleanup. Reviewed-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph/dir.c') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 7263f825d42..852ff8600ac 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -506,7 +506,7 @@ int ceph_handle_snapdir(struct ceph_mds_request *req, struct dentry *dentry, int err) { struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); - struct inode *parent = dentry->d_parent->d_inode; + struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */ /* .snap dir? */ if (err == -ENOENT && -- cgit v1.2.3-18-g5258