aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@citi.umich.edu>2010-06-08 20:05:18 -0400
committerJ. Bruce Fields <bfields@citi.umich.edu>2010-06-08 20:05:18 -0400
commit44b56603c4c476b845a824cff6fe905c6268b2a1 (patch)
treeb7e792414fef2390718a657765719fbbb529ce84 /fs
parentc3935e30495869dd611e1cd62253c94ebc7c6c04 (diff)
parentb160fdabe93a8a53094f90f02bf4dcb500782aab (diff)
Merge branch 'for-2.6.34-incoming' into for-2.6.35-incoming
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs4/root.c5
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/namei.c98
-rw-r--r--fs/cachefiles/security.c4
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/auth.c1
-rw-r--r--fs/ceph/auth_none.h2
-rw-r--r--fs/ceph/auth_x.c32
-rw-r--r--fs/ceph/caps.c21
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c8
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/messenger.c39
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c26
-rw-r--r--fs/ceph/osd_client.h3
-rw-r--r--fs/ceph/osdmap.c29
-rw-r--r--fs/ceph/osdmap.h2
-rw-r--r--fs/ceph/rados.h1
-rw-r--r--fs/ceph/snap.c24
-rw-r--r--fs/ceph/super.c30
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/inode.c21
-rw-r--r--fs/compat.c2
-rw-r--r--fs/configfs/dir.c4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/jfs/super.c13
-rw-r--r--fs/logfs/super.c14
-rw-r--r--fs/namei.c27
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfs/delegation.c86
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/super.c1
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c16
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/dlm/dlmast.c5
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/file.c32
-rw-r--r--fs/ocfs2/inode.c68
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/namei.c58
-rw-r--r--fs/ocfs2/refcounttree.c3
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/task_mmu.c19
-rw-r--r--fs/sysv/dir.c2
50 files changed, 518 insertions, 276 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 109a6c606d9..e8e5e63ac95 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
}
/* Trigger mount for path component or follow link */
} else if (ino->flags & AUTOFS_INF_PENDING ||
- autofs4_need_mount(flags) ||
- current->link_count) {
+ autofs4_need_mount(flags)) {
DPRINTK("waiting for mount name=%.*s",
dentry->d_name.len, dentry->d_name.name);
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
- status = try_to_fill_dentry(dentry, 0);
+ status = try_to_fill_dentry(dentry, nd->flags);
if (status)
goto out_error;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e84ef60ffe3..97a97839a86 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1481,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
ret = -EBADF;
goto out_drop_write;
}
+
src = src_file->f_dentry->d_inode;
ret = -EINVAL;
if (src == inode)
goto out_fput;
+ /* the src must be open for reading */
+ if (!(src_file->f_mode & FMODE_READ))
+ goto out_fput;
+
ret = -EISDIR;
if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
goto out_fput;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index f7c255f9c62..a8cd821226d 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -34,6 +34,7 @@ struct cachefiles_object {
loff_t i_size; /* object size */
unsigned long flags;
#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
+#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */
atomic_t usage; /* object usage count */
uint8_t type; /* object type */
uint8_t new; /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d5db84a1ee0..f4a7840bf42 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
}
/*
+ * mark the owner of a dentry, if there is one, to indicate that that dentry
+ * has been preemptively deleted
+ * - the caller must hold the i_mutex on the dentry's parent as required to
+ * call vfs_unlink(), vfs_rmdir() or vfs_rename()
+ */
+static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
+ struct dentry *dentry)
+{
+ struct cachefiles_object *object;
+ struct rb_node *p;
+
+ _enter(",'%*.*s'",
+ dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
+
+ write_lock(&cache->active_lock);
+
+ p = cache->active_nodes.rb_node;
+ while (p) {
+ object = rb_entry(p, struct cachefiles_object, active_node);
+ if (object->dentry > dentry)
+ p = p->rb_left;
+ else if (object->dentry < dentry)
+ p = p->rb_right;
+ else
+ goto found_dentry;
+ }
+
+ write_unlock(&cache->active_lock);
+ _leave(" [no owner]");
+ return;
+
+ /* found the dentry for */
+found_dentry:
+ kdebug("preemptive burial: OBJ%x [%s] %p",
+ object->fscache.debug_id,
+ fscache_object_states[object->fscache.state],
+ dentry);
+
+ if (object->fscache.state < FSCACHE_OBJECT_DYING) {
+ printk(KERN_ERR "\n");
+ printk(KERN_ERR "CacheFiles: Error:"
+ " Can't preemptively bury live object\n");
+ cachefiles_printk_object(object, NULL);
+ } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
+ printk(KERN_ERR "CacheFiles: Error:"
+ " Object already preemptively buried\n");
+ }
+
+ write_unlock(&cache->active_lock);
+ _leave(" [owner marked]");
+}
+
+/*
* record the fact that an object is now active
*/
static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
@@ -219,7 +272,8 @@ requeue:
*/
static int cachefiles_bury_object(struct cachefiles_cache *cache,
struct dentry *dir,
- struct dentry *rep)
+ struct dentry *rep,
+ bool preemptive)
{
struct dentry *grave, *trap;
char nbuffer[8 + 8 + 1];
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
dir->d_name.len, dir->d_name.len, dir->d_name.name,
rep->d_name.len, rep->d_name.len, rep->d_name.name);
+ _debug("remove %p from %p", rep, dir);
+
/* non-directories can just be unlinked */
if (!S_ISDIR(rep->d_inode->i_mode)) {
_debug("unlink stale object");
ret = vfs_unlink(dir->d_inode, rep);
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+
mutex_unlock(&dir->d_inode->i_mutex);
if (ret == -EIO)
@@ -325,6 +384,9 @@ try_again:
if (ret != 0 && ret != -ENOMEM)
cachefiles_io_error(cache, "Rename failed with error %d", ret);
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+
unlock_rename(cache->graveyard, dir);
dput(grave);
_leave(" = 0");
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
struct dentry *dir;
int ret;
- _enter(",{%p}", object->dentry);
+ _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
ASSERT(object->dentry);
ASSERT(object->dentry->d_inode);
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- /* we need to check that our parent is _still_ our parent - it may have
- * been renamed */
- if (dir == object->dentry->d_parent) {
- ret = cachefiles_bury_object(cache, dir, object->dentry);
- } else {
- /* it got moved, presumably by cachefilesd culling it, so it's
- * no longer in the key path and we can ignore it */
+ if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
+ /* object allocation for the same key preemptively deleted this
+ * object's file so that it could create its own file */
+ _debug("object preemptively buried");
mutex_unlock(&dir->d_inode->i_mutex);
ret = 0;
+ } else {
+ /* we need to check that our parent is _still_ our parent - it
+ * may have been renamed */
+ if (dir == object->dentry->d_parent) {
+ ret = cachefiles_bury_object(cache, dir,
+ object->dentry, false);
+ } else {
+ /* it got moved, presumably by cachefilesd culling it,
+ * so it's no longer in the key path and we can ignore
+ * it */
+ mutex_unlock(&dir->d_inode->i_mutex);
+ ret = 0;
+ }
}
dput(dir);
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
const char *name;
int ret, nlen;
- _enter("{%p},,%s,", parent->dentry, key);
+ _enter("OBJ%x{%p},OBJ%x,%s,",
+ parent->fscache.debug_id, parent->dentry,
+ object->fscache.debug_id, key);
cache = container_of(parent->fscache.cache,
struct cachefiles_cache, cache);
@@ -509,7 +583,7 @@ lookup_again:
* mutex) */
object->dentry = NULL;
- ret = cachefiles_bury_object(cache, dir, next);
+ ret = cachefiles_bury_object(cache, dir, next, true);
dput(next);
next = NULL;
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
/* actually remove the victim (drops the dir mutex) */
_debug("bury");
- ret = cachefiles_bury_object(cache, dir, victim);
+ ret = cachefiles_bury_object(cache, dir, victim, false);
if (ret < 0)
goto error;
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cdb223..039b5011d83 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
/*
* check the security details of the on-disk cache
* - must be called with security override in force
+ * - must return with a security override in force - even in the case of an
+ * error
*/
int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
* which create files */
ret = set_create_files_as(new, root->d_inode);
if (ret < 0) {
+ abort_creds(new);
+ cachefiles_begin_secure(cache, _saved_cred);
_leave(" = %d [cfa]", ret);
return ret;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 412593703d1..a9005d862ed 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -504,12 +504,11 @@ static void writepages_finish(struct ceph_osd_request *req,
int i;
struct ceph_snap_context *snapc = req->r_snapc;
struct address_space *mapping = inode->i_mapping;
- struct writeback_control *wbc = req->r_wbc;
__s32 rc = -EIO;
u64 bytes = 0;
struct ceph_client *client = ceph_inode_to_client(inode);
long writeback_stat;
- unsigned issued = __ceph_caps_issued(ci, NULL);
+ unsigned issued = ceph_caps_issued(ci);
/* parse reply */
replyhead = msg->front.iov_base;
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req,
clear_bdi_congested(&client->backing_dev_info,
BLK_RW_ASYNC);
- if (i >= wrote) {
- dout("inode %p skipping page %p\n", inode, page);
- wbc->pages_skipped++;
- }
ceph_put_snap_context((void *)page->private);
page->private = 0;
ClearPagePrivate(page);
@@ -799,7 +794,6 @@ get_more_pages:
alloc_page_vec(client, req);
req->r_callback = writepages_finish;
req->r_inode = inode;
- req->r_wbc = wbc;
}
/* note position of first page in pvec */
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b94b86..818afe72e6c 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/slab.h>
#include "types.h"
#include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c05533a31..8164df1a08b 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
#ifndef _FS_CEPH_AUTH_NONE_H
#define _FS_CEPH_AUTH_NONE_H
+#include <linux/slab.h>
+
#include "auth.h"
/*
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4dc8c..fee5a08da88 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
#include "auth.h"
#include "decode.h"
-struct kmem_cache *ceph_x_ticketbuf_cachep;
-
#define TEMP_TICKET_BUF_LEN 256
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
char *ticket_buf;
u8 struct_v;
- dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC);
+ dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
if (!dbuf)
return -ENOMEM;
ret = -ENOMEM;
- ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep,
- GFP_NOFS | GFP_ATOMIC);
+ ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
if (!ticket_buf)
goto out_dbuf;
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
ret = 0;
out:
- kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf);
+ kfree(ticket_buf);
out_dbuf:
- kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf);
+ kfree(dbuf);
return ret;
bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
- kmem_cache_destroy(ceph_x_ticketbuf_cachep);
-
kfree(ac->private);
ac->private = NULL;
}
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
int ret;
dout("ceph_x_init %p\n", ac);
+ ret = -ENOMEM;
xi = kzalloc(sizeof(*xi), GFP_NOFS);
if (!xi)
- return -ENOMEM;
+ goto out;
- ret = -ENOMEM;
- ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
- TEMP_TICKET_BUF_LEN, 8,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
- NULL);
- if (!ceph_x_ticketbuf_cachep)
- goto done_nomem;
ret = -EINVAL;
if (!ac->secret) {
pr_err("no secret set (for auth_x protocol)\n");
- goto done_nomem;
+ goto out_nomem;
}
ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
if (ret)
- goto done_nomem;
+ goto out_nomem;
xi->starting = true;
xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
ac->ops = &ceph_x_ops;
return 0;
-done_nomem:
+out_nomem:
kfree(xi);
- if (ceph_x_ticketbuf_cachep)
- kmem_cache_destroy(ceph_x_ticketbuf_cachep);
+out:
return ret;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa2239fa9a3..d9400534b27 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
}
/*
+ * Remove a cap. Take steps to deal with a racing iterate_session_caps.
+ *
* caller should hold i_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ int removed = 0;
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
- /* remove from inode list */
- rb_erase(&cap->ci_node, &ci->i_caps);
- cap->ci = NULL;
- if (ci->i_auth_cap == cap)
- ci->i_auth_cap = NULL;
-
/* remove from session list */
spin_lock(&session->s_cap_lock);
if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
list_del_init(&cap->session_caps);
session->s_nr_caps--;
cap->session = NULL;
+ removed = 1;
}
+ /* protect backpointer with s_cap_lock: see iterate_session_caps */
+ cap->ci = NULL;
spin_unlock(&session->s_cap_lock);
- if (cap->session == NULL)
+ /* remove from inode list */
+ rb_erase(&cap->ci_node, &ci->i_caps);
+ if (ci->i_auth_cap == cap)
+ ci->i_auth_cap = NULL;
+
+ if (removed)
ceph_put_cap(cap);
if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
@@ -1861,8 +1866,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
} else {
pr_err("%p auth cap %p not mds%d ???\n", inode,
cap, session->s_mds);
- spin_unlock(&inode->i_lock);
}
+ spin_unlock(&inode->i_lock);
}
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ea8ee2e526a..650d2db5ed2 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -880,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
* do_request, above). If there is no trace, we need
* to do it here.
*/
+
+ /* d_move screws up d_subdirs order */
+ ceph_i_clear(new_dir, CEPH_I_COMPLETE);
+
d_move(old_dentry, new_dentry);
+
+ /* ensure target dentry is invalidated, despite
+ rehashing bug in vfs_rename_dir */
+ new_dentry->d_time = jiffies;
+ ceph_dentry(new_dentry)->lease_shared_gen = 0;
}
ceph_mdsc_put_request(req);
return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5da2c..ed6f19721d6 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
* throw out any page cache pages in this range. this
* may block.
*/
- truncate_inode_pages_range(inode->i_mapping, pos, pos+len);
+ truncate_inode_pages_range(inode->i_mapping, pos,
+ (pos+len) | (PAGE_CACHE_SIZE-1));
} else {
pages = alloc_page_vector(num_pages);
if (IS_ERR(pages)) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 26f883c275e..85b4d2ffdeb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
__ceph_get_fmode(ci, cap_fmode);
spin_unlock(&inode->i_lock);
}
+ } else if (cap_fmode >= 0) {
+ pr_warning("mds issued no caps on %llx.%llx\n",
+ ceph_vinop(inode));
+ __ceph_get_fmode(ci, cap_fmode);
}
/* update delegation info? */
@@ -997,6 +1001,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
dn, dn->d_name.len, dn->d_name.name);
dout("fill_trace doing d_move %p -> %p\n",
req->r_old_dentry, dn);
+
+ /* d_move screws up d_subdirs order */
+ ceph_i_clear(dir, CEPH_I_COMPLETE);
+
d_move(req->r_old_dentry, dn);
dout(" src %p '%.*s' dst %p '%.*s'\n",
req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4ae47b..24561a557e0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
}
/*
- * Helper to safely iterate over all caps associated with a session.
+ * Helper to safely iterate over all caps associated with a session, with
+ * special care taken to handle a racing __ceph_remove_cap().
*
- * caller must hold session s_mutex
+ * Caller must hold session s_mutex.
*/
static int iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
struct ceph_mds_session *session = NULL;
struct ceph_msg *reply;
struct rb_node *p;
- int err;
+ int err = -ENOMEM;
struct ceph_pagelist *pagelist;
pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
goto fail;
err = iterate_session_caps(session, encode_caps_cb, pagelist);
if (err < 0)
- goto out;
+ goto fail;
/*
* snaprealms. we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
reply->nr_pages = calc_pages_for(0, pagelist->length);
ceph_con_send(&session->s_con, reply);
- if (session) {
- session->s_state = CEPH_MDS_SESSION_OPEN;
- __wake_requests(mdsc, &session->s_waiting);
- }
+ session->s_state = CEPH_MDS_SESSION_OPEN;
+ mutex_unlock(&session->s_mutex);
+
+ mutex_lock(&mdsc->mutex);
+ __wake_requests(mdsc, &session->s_waiting);
+ mutex_unlock(&mdsc->mutex);
+
+ ceph_put_mds_session(session);
-out:
up_read(&mdsc->snap_rwsem);
- if (session) {
- mutex_unlock(&session->s_mutex);
- ceph_put_mds_session(session);
- }
mutex_lock(&mdsc->mutex);
return;
fail:
ceph_msg_put(reply);
+ up_read(&mdsc->snap_rwsem);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
fail_nomsg:
ceph_pagelist_release(pagelist);
kfree(pagelist);
fail_nopagelist:
- pr_err("ENOMEM preparing reconnect for mds%d\n", mds);
- goto out;
+ pr_err("error %d preparing reconnect for mds%d\n", err, mds);
+ mutex_lock(&mdsc->mutex);
+ return;
}
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index cdaaa131add..cd4fadb6491 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
list_move_tail(&m->list_head, &con->out_sent);
}
- m->hdr.seq = cpu_to_le64(++con->out_seq);
+ /*
+ * only assign outgoing seq # if we haven't sent this message
+ * yet. if it is requeued, resend with it's original seq.
+ */
+ if (m->needs_out_seq) {
+ m->hdr.seq = cpu_to_le64(++con->out_seq);
+ m->needs_out_seq = false;
+ }
dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1334,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con)
unsigned front_len, middle_len, data_len, data_off;
int datacrc = con->msgr->nocrc;
int skip;
+ u64 seq;
dout("read_partial_message con %p msg %p\n", con, m);
@@ -1368,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con)
return -EIO;
data_off = le16_to_cpu(con->in_hdr.data_off);
+ /* verify seq# */
+ seq = le64_to_cpu(con->in_hdr.seq);
+ if ((s64)seq - (s64)con->in_seq < 1) {
+ pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+ ENTITY_NAME(con->peer_name),
+ pr_addr(&con->peer_addr.in_addr),
+ seq, con->in_seq + 1);
+ con->in_base_pos = -front_len - middle_len - data_len -
+ sizeof(m->footer);
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
+ return 0;
+ } else if ((s64)seq - (s64)con->in_seq > 1) {
+ pr_err("read_partial_message bad seq %lld expected %lld\n",
+ seq, con->in_seq + 1);
+ con->error_msg = "bad message sequence # for incoming message";
+ return -EBADMSG;
+ }
+
/* allocate message? */
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
@@ -1379,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con)
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
return 0;
}
if (IS_ERR(con->in_msg)) {
@@ -1965,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+ msg->needs_out_seq = true;
+
/* queue */
mutex_lock(&con->mutex);
BUG_ON(!list_empty(&msg->list_head));
@@ -2030,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
} else {
dout("con_revoke_pages %p msg %p pages %p no-op\n",
con, con->in_msg, msg