aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dir.c6
-rw-r--r--fs/9p/vfs_inode.c9
-rw-r--r--fs/9p/vfs_super.c20
-rw-r--r--fs/aio.c13
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/bio-integrity.c4
-rw-r--r--fs/ceph/Kconfig1
-rw-r--r--fs/ceph/addr.c7
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/dir.c10
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/ceph/pagelist.c12
-rw-r--r--fs/ceph/snap.c92
-rw-r--r--fs/ceph/super.h5
-rw-r--r--fs/char_dev.c4
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/asn1.c6
-rw-r--r--fs/cifs/cifsencrypt.c418
-rw-r--r--fs/cifs/cifsglob.h25
-rw-r--r--fs/cifs/cifspdu.h7
-rw-r--r--fs/cifs/cifsproto.h13
-rw-r--r--fs/cifs/cifssmb.c62
-rw-r--r--fs/cifs/connect.c77
-rw-r--r--fs/cifs/inode.c32
-rw-r--r--fs/cifs/netmisc.c22
-rw-r--r--fs/cifs/ntlmssp.h13
-rw-r--r--fs/cifs/sess.c132
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/psdev.c4
-rw-r--r--fs/compat.c2
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/exec.c14
-rw-r--r--fs/fcntl.c10
-rw-r--r--fs/fs-writeback.c14
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/super.c8
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/ocfs2/acl.c3
-rw-r--r--fs/ocfs2/cluster/heartbeat.c532
-rw-r--r--fs/ocfs2/cluster/heartbeat.h4
-rw-r--r--fs/ocfs2/cluster/masklog.h3
-rw-r--r--fs/ocfs2/cluster/nodemanager.c5
-rw-r--r--fs/ocfs2/cluster/ocfs2_nodemanager.h6
-rw-r--r--fs/ocfs2/cluster/tcp.c7
-rw-r--r--fs/ocfs2/dir.c24
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h30
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c13
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c401
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/dlmglue.h1
-rw-r--r--fs/ocfs2/ocfs2.h40
-rw-r--r--fs/ocfs2/ocfs2_fs.h78
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h8
-rw-r--r--fs/ocfs2/refcounttree.c5
-rw-r--r--fs/ocfs2/reservations.c22
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c59
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c4
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/reiserfs/ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_log_cil.c12
-rw-r--r--fs/xfs/xfs_log_priv.h37
75 files changed, 1612 insertions, 864 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 16c8a2a98c1..899f168fd19 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -292,9 +292,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
fid = filp->private_data;
P9_DPRINTK(P9_DEBUG_VFS,
- "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid);
+ "v9fs_dir_release: inode: %p filp: %p fid: %d\n",
+ inode, filp, fid ? fid->fid : -1);
filemap_write_and_wait(inode->i_mapping);
- p9_client_clunk(fid);
+ if (fid)
+ p9_client_clunk(fid);
return 0;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c7c23eab944..9e670d52764 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -730,7 +730,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
goto error;
}
- dentry->d_op = &v9fs_cached_dentry_operations;
+ if (v9ses->cache)
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ else
+ dentry->d_op = &v9fs_dentry_operations;
d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
@@ -1128,6 +1131,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
generic_fillattr(dentry->d_inode, stat);
+ p9stat_free(st);
kfree(st);
return 0;
}
@@ -1489,6 +1493,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
retval = strnlen(buffer, buflen);
done:
+ p9stat_free(st);
kfree(st);
return retval;
}
@@ -1942,7 +1947,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.unlink = v9fs_vfs_unlink,
.mkdir = v9fs_vfs_mkdir,
.rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod_dotl,
+ .mknod = v9fs_vfs_mknod,
.rename = v9fs_vfs_rename,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f9311077de6..1d12ba0ed3d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -122,6 +122,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
fid = v9fs_session_init(v9ses, dev_name, data);
if (IS_ERR(fid)) {
retval = PTR_ERR(fid);
+ /*
+ * we need to call session_close to tear down some
+ * of the data structure setup by session_init
+ */
goto close_session;
}
@@ -144,7 +148,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
retval = -ENOMEM;
goto release_sb;
}
-
sb->s_root = root;
if (v9fs_proto_dotl(v9ses)) {
@@ -152,7 +155,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
if (IS_ERR(st)) {
retval = PTR_ERR(st);
- goto clunk_fid;
+ goto release_sb;
}
v9fs_stat2inode_dotl(st, root->d_inode);
@@ -162,7 +165,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
st = p9_client_stat(fid);
if (IS_ERR(st)) {
retval = PTR_ERR(st);
- goto clunk_fid;
+ goto release_sb;
}
root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
@@ -174,19 +177,24 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
v9fs_fid_add(root, fid);
-P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
+ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
simple_set_mnt(mnt, sb);
return 0;
clunk_fid:
p9_client_clunk(fid);
-
close_session:
v9fs_session_close(v9ses);
kfree(v9ses);
return retval;
-
release_sb:
+ /*
+ * we will do the session_close and root dentry release
+ * in the below call. But we need to clunk fid, because we haven't
+ * attached the fid to dentry so it won't get clunked
+ * automatically.
+ */
+ p9_client_clunk(fid);
deactivate_locked_super(sb);
return retval;
}
diff --git a/fs/aio.c b/fs/aio.c
index 3006b5bc33d..250b0a73c8a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
*/
ret = retry(iocb);
- if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
+ if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
+ /*
+ * There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
+ ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
+ ret = -EINTR;
aio_complete(iocb, ret, 0);
+ }
out:
spin_lock_irq(&ctx->ctx_lock);
@@ -1659,6 +1667,9 @@ long do_io_submit(aio_context_t ctx_id, long nr,
if (unlikely(nr < 0))
return -EINVAL;
+ if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
+ nr = LONG_MAX/sizeof(*iocbpp);
+
if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
return -EFAULT;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a7528b91393..fd0cc0bf9a4 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -724,7 +724,7 @@ static int __init init_misc_binfmt(void)
{
int err = register_filesystem(&bm_fs_type);
if (!err) {
- err = register_binfmt(&misc_format);
+ err = insert_binfmt(&misc_format);
if (err)
unregister_filesystem(&bm_fs_type);
}
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 612a5c38d3c..4d0ff5ee27b 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -413,10 +413,10 @@ int bio_integrity_prep(struct bio *bio)
/* Allocate kernel buffer for protection data */
len = sectors * blk_integrity_tuple_size(bi);
- buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
+ buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
if (unlikely(buf == NULL)) {
printk(KERN_ERR "could not allocate integrity buffer\n");
- return -EIO;
+ return -ENOMEM;
}
end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index bc87b9c1d27..0fcd2640c23 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,6 +3,7 @@ config CEPH_FS
depends on INET && EXPERIMENTAL
select LIBCRC32C
select CRYPTO_AES
+ select CRYPTO
help
Choose Y or M here to include support for mounting the
experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4cfce1ee31f..efbc604001c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
if (i_size < page_off + len)
len = i_size - page_off;
- dout("writepage %p page %p index %lu on %llu~%u\n",
- inode, page, page->index, page_off, len);
+ dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
+ inode, page, page->index, page_off, len, snapc);
writeback_stat = atomic_long_inc_return(&client->writeback_count);
if (writeback_stat >
@@ -766,7 +766,8 @@ get_more_pages:
/* ok */
if (locked_pages == 0) {
/* prepare async write request */
- offset = page->index << PAGE_CACHE_SHIFT;
+ offset = (unsigned long long)page->index
+ << PAGE_CACHE_SHIFT;
len = wsize;
req = ceph_osdc_new_request(&client->osdc,
&ci->i_layout,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a2069b6680a..73c153092f7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
used |= CEPH_CAP_PIN;
if (ci->i_rd_ref)
used |= CEPH_CAP_FILE_RD;
- if (ci->i_rdcache_ref || ci->i_rdcache_gen)
+ if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
used |= CEPH_CAP_FILE_CACHE;
if (ci->i_wr_ref)
used |= CEPH_CAP_FILE_WR;
@@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
* asynchronously back to the MDS once sync writes complete and dirty
* data is written out.
*
+ * Unless @again is true, skip cap_snaps that were already sent to
+ * the MDS (i.e., during this session).
+ *
* Called under i_lock. Takes s_mutex as needed.
*/
void __ceph_flush_snaps(struct ceph_inode_info *ci,
- struct ceph_mds_session **psession)
+ struct ceph_mds_session **psession,
+ int again)
__releases(ci->vfs_inode->i_lock)
__acquires(ci->vfs_inode->i_lock)
{
@@ -1227,7 +1231,7 @@ retry:
* pages to be written out.
*/
if (capsnap->dirty_pages || capsnap->writing)
- continue;
+ break;
/*
* if cap writeback already occurred, we should have dropped
@@ -1240,6 +1244,13 @@ retry:
dout("no auth cap (migrating?), doing nothing\n");
goto out;
}
+
+ /* only flush each capsnap once */
+ if (!again && !list_empty(&capsnap->flushing_item)) {
+ dout("already flushed %p, skipping\n", capsnap);
+ continue;
+ }
+
mds = ci->i_auth_cap->session->s_mds;
mseq = ci->i_auth_cap->mseq;
@@ -1276,8 +1287,8 @@ retry:
&session->s_cap_snaps_flushing);
spin_unlock(&inode->i_lock);
- dout("flush_snaps %p cap_snap %p follows %lld size %llu\n",
- inode, capsnap, next_follows, capsnap->size);
+ dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
+ inode, capsnap, capsnap->follows, capsnap->flush_tid);
send_cap_msg(session, ceph_vino(inode).ino, 0,
CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
@@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
struct inode *inode = &ci->vfs_inode;
spin_lock(&inode->i_lock);
- __ceph_flush_snaps(ci, NULL);
+ __ceph_flush_snaps(ci, NULL, 0);
spin_unlock(&inode->i_lock);
}
@@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
/* flush snaps first time around only */
if (!list_empty(&ci->i_cap_snaps))
- __ceph_flush_snaps(ci, &session);
+ __ceph_flush_snaps(ci, &session, 0);
goto retry_locked;
retry:
spin_lock(&inode->i_lock);
@@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
if (cap && cap->session == session) {
dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
cap, capsnap);
- __ceph_flush_snaps(ci, &session);
+ __ceph_flush_snaps(ci, &session, 1);
} else {
pr_err("%p auth cap %p not mds%d ???\n", inode,
cap, session->s_mds);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6e4f43ff23e..a1986eb5204 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1021,11 +1021,15 @@ out_touch:
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
- struct inode *parent_inode = dentry->d_parent->d_inode;
- u64 snapid = ceph_snap(parent_inode);
+ struct inode *parent_inode = NULL;
+ u64 snapid = CEPH_NOSNAP;
+ if (!IS_ROOT(dentry)) {
+ parent_inode = dentry->d_parent->d_inode;
+ if (parent_inode)
+ snapid = ceph_snap(parent_inode);
+ }
dout("dentry_release %p parent %p\n", dentry, parent_inode);
-
if (parent_inode && snapid != CEPH_SNAPDIR) {
struct ceph_inode_info *ci = ceph_inode(parent_inode);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e7cca414da0..62377ec37ed 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
* the caller) if we fail.
*/
static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
- bool *prehash)
+ bool *prehash, bool set_offset)
{
struct dentry *realdn;
@@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
}
if ((!prehash || *prehash) && d_unhashed(dn))
d_rehash(dn);
- ceph_set_dentry_offset(dn);
+ if (set_offset)
+ ceph_set_dentry_offset(dn);
out:
return dn;
}
@@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
d_delete(dn);
goto done;
}
- dn = splice_dentry(dn, in, &have_lease);
+ dn = splice_dentry(dn, in, &have_lease, true);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
@@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
goto done;
}
dout(" linking snapped dir %p to dn %p\n", in, dn);
- dn = splice_dentry(dn, in, NULL);
+ dn = splice_dentry(dn, in, NULL, true);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
@@ -1237,7 +1238,7 @@ retry_lookup:
err = PTR_ERR(in);
goto out;
}
- dn = splice_dentry(dn, in, NULL);
+ dn = splice_dentry(dn, in, NULL, false);
if (IS_ERR(dn))
dn = NULL;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f091b135178..fad95f8f260 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
num_fcntl_locks,
num_flock_locks);
unlock_kernel();
+ } else {
+ err = ceph_pagelist_append(pagelist, &rec, reclen);
}
out_free:
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index b6859f47d36..46a368b6dce 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -5,10 +5,18 @@
#include "pagelist.h"
+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
+{
+ struct page *page = list_entry(pl->head.prev, struct page,
+ lru);
+ kunmap(page);
+}
+
int ceph_pagelist_release(struct ceph_pagelist *pl)
{
if (pl->mapped_tail)
- kunmap(pl->mapped_tail);
+ ceph_pagelist_unmap_tail(pl);
+
while (!list_empty(&pl->head)) {
struct page *page = list_first_entry(&pl->head, struct page,
lru);
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
pl->room += PAGE_SIZE;
list_add_tail(&page->lru, &pl->head);
if (pl->mapped_tail)
- kunmap(pl->mapped_tail);
+ ceph_pagelist_unmap_tail(pl);
pl->mapped_tail = kmap(page);
return 0;
}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4868b9dcac5..190b6c4a6f2 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
INIT_LIST_HEAD(&realm->children);
INIT_LIST_HEAD(&realm->child_item);
INIT_LIST_HEAD(&realm->empty_item);
+ INIT_LIST_HEAD(&realm->dirty_item);
INIT_LIST_HEAD(&realm->inodes_with_caps);
spin_lock_init(&realm->inodes_with_caps_lock);
__insert_snap_realm(&mdsc->snap_realms, realm);
@@ -467,7 +468,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
INIT_LIST_HEAD(&capsnap->ci_item);
INIT_LIST_HEAD(&capsnap->flushing_item);
- capsnap->follows = snapc->seq - 1;
+ capsnap->follows = snapc->seq;
capsnap->issued = __ceph_caps_issued(ci, NULL);
capsnap->dirty = dirty;
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
struct ceph_snap_realm *realm;
int invalidate = 0;
int err = -ENOMEM;
+ LIST_HEAD(dirty_realms);
dout("update_snap_trace deletion=%d\n", deletion);
more:
@@ -626,24 +628,6 @@ more:
}
}
- if (le64_to_cpu(ri->seq) > realm->seq) {
- dout("update_snap_trace updating %llx %p %lld -> %lld\n",
- realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
- /*
- * if the realm seq has changed, queue a cap_snap for every
- * inode with open caps. we do this _before_ we update
- * the realm info so that we prepare for writeback under the
- * _previous_ snap context.
- *
- * ...unless it's a snap deletion!
- */
- if (!deletion)
- queue_realm_cap_snaps(realm);
- } else {
- dout("update_snap_trace %llx %p seq %lld unchanged\n",
- realm->ino, realm, realm->seq);
- }
-
/* ensure the parent is correct */
err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
if (err < 0)
@@ -651,6 +635,8 @@ more:
invalidate += err;
if (le64_to_cpu(ri->seq) > realm->seq) {
+ dout("update_snap_trace updating %llx %p %lld -> %lld\n",
+ realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
/* update realm parameters, snap lists */
realm->seq = le64_to_cpu(ri->seq);
realm->created = le64_to_cpu(ri->created);
@@ -668,9 +654,17 @@ more:
if (err < 0)
goto fail;
+ /* queue realm for cap_snap creation */
+ list_add(&realm->dirty_item, &dirty_realms);
+
invalidate = 1;
} else if (!realm->cached_context) {
+ dout("update_snap_trace %llx %p seq %lld new\n",
+ realm->ino, realm, realm->seq);
invalidate = 1;
+ } else {
+ dout("update_snap_trace %llx %p seq %lld unchanged\n",
+ realm->ino, realm, realm->seq);
}
dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -683,6 +677,14 @@ more:
if (invalidate)
rebuild_snap_realms(realm);
+ /*
+ * queue cap snaps _after_ we've built the new snap contexts,
+ * so that i_head_snapc can be set appropriately.
+ */
+ list_for_each_entry(realm, &dirty_realms, dirty_item) {
+ queue_realm_cap_snaps(realm);
+ }
+
__cleanup_empty_realms(mdsc);
return 0;
@@ -715,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
igrab(inode);
spin_unlock(&mdsc->snap_flush_lock);
spin_lock(&inode->i_lock);
- __ceph_flush_snaps(ci, &session);
+ __ceph_flush_snaps(ci, &session, 0);
spin_unlock(&inode->i_lock);
iput(inode);
spin_lock(&mdsc->snap_flush_lock);
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
};
struct inode *inode = ceph_find_inode(sb, vino);
struct ceph_inode_info *ci;
+ struct ceph_snap_realm *oldrealm;
if (!inode)
continue;
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
dout(" will move %p to split realm %llx %p\n",
inode, realm->ino, realm);
/*
- * Remove the inode from the realm's inode
- * list, but don't add it to the new realm
- * yet. We don't want the cap_snap to be
- * queued (again) by ceph_update_snap_trace()
- * below. Queue it _now_, under the old context.
+ * Move the inode to the new realm
*/
spin_lock(&realm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
+ list_add(&ci->i_snap_realm_item,
+ &realm->inodes_with_caps);
+ oldrealm = ci->i_snap_realm;
+ ci->i_snap_realm = realm;
spin_unlock(&realm->inodes_with_caps_lock);
spin_unlock(&inode->i_lock);
- ceph_queue_cap_snap(ci);
+ ceph_get_snap_realm(mdsc, realm);
+ ceph_put_snap_realm(mdsc, oldrealm);
iput(inode);
continue;
@@ -880,43 +884,9 @@ skip_inode:
ceph_update_snap_trace(mdsc, p, e,
op == CEPH_SNAP_OP_DESTROY);
- if (op == CEPH_SNAP_OP_SPLIT) {
- /*
- * ok, _now_ add the inodes into the new realm.
- */
- for (i = 0; i < num_split_inos; i++) {
- struct ceph_vino vino = {
- .ino = le64_to_cpu(split_inos[i]),
- .snap = CEPH_NOSNAP,
- };
- struct inode *inode = ceph_find_inode(sb, vino);
- struct ceph_inode_info *ci;
-
- if (!inode)
- continue;
- ci = ceph_inode(inode);
- spin_lock(&inode->i_lock);
- if (list_empty(&ci->i_snap_realm_item)) {
- struct ceph_snap_realm *oldrealm =
- ci->i_snap_realm;
-
- dout(" moving %p to split realm %llx %p\n",
- inode, realm->ino, realm);
- spin_lock(&realm->inodes_with_caps_lock);
- list_add(&ci->i_snap_realm_item,
- &realm->inodes_with_caps);
- ci->i_snap_realm = realm;
- spin_unlock(&realm->inodes_with_caps_lock);
- ceph_get_snap_realm(mdsc, realm);
- ceph_put_snap_realm(mdsc, oldrealm);
- }
- spin_unlock(&inode->i_lock);
- iput(inode);
- }
-
+ if (op == CEPH_SNAP_OP_SPLIT)
/* we took a reference when we created the realm, above */
ceph_put_snap_realm(mdsc, realm);
- }
__cleanup_empty_realms(mdsc);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c33897ae572..b87638e84c4 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -690,6 +690,8 @@ struct ceph_snap_realm {
struct list_head empty_item; /* if i have ref==0 */
+ struct list_head dirty_item; /* if realm needs new context */
+
/* the current set of snaps for this realm */
struct ceph_snap_context *cached_context;
@@ -826,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
- struct ceph_mds_session **psession);
+ struct ceph_mds_session **psession,
+ int again);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session);
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f80a4f25123..143d393881c 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = {
#endif
/* permit direct mmap, for read, write or exec */
BDI_CAP_MAP_DIRECT |
- BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP),
+ BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
+ /* no writeback happens */
+ BDI_CAP_NO_ACCT_AND_WRITEBACK),
};
static struct kobj_map *cdev_map;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 0da1debd499..917b7d449bb 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,8 +2,6 @@ config CIFS
tristate "CIFS support (advanced network filesystem, SMBFS successor)"
depends on INET
select NLS
- select CRYPTO_MD5
- select CRYPTO_ARC4
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 21f0fbd8698..cfd1ce34e0b 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
if (compare_oid(oid, oidlen, MSKRB5_OID,
MSKRB5_OID_LEN))
server->sec_mskerberos = true;
- if (compare_oid(oid, oidlen, KRB5U2U_OID,
+ else if (compare_oid(oid, oidlen, KRB5U2U_OID,
KRB5U2U_OID_LEN))
server->sec_kerberosu2u = true;
- if (compare_oid(oid, oidlen, KRB5_OID,
+ else if (compare_oid(oid, oidlen, KRB5_OID,
KRB5_OID_LEN))
server->sec_kerberos = true;
- if (compare_oid(oid, oidlen, NTLMSSP_OID,
+ else if (compare_oid(oid, oidlen, NTLMSSP_OID,
NTLMSSP_OID_LEN))
server->sec_ntlmssp = true;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 709f2296bdb..35042d8f733 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -27,7 +27,6 @@
#include "md5.h"
#include "cifs_unicode.h"
#include "cifsproto.h"
-#include "ntlmssp.h"
#include <linux/ctype.h>
#include <linux/random.h>
@@ -43,43 +42,21 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
unsigned char *p24);
static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
- struct TCP_Server_Info *server, char *signature)
+ const struct mac_key *key, char *signature)
{
- int rc;
+ struct MD5Context context;
- if (cifs_pdu == NULL || server == NULL || signature == NULL)
+ if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
return -EINVAL;
- if (!server->ntlmssp.sdescmd5) {
- cERROR(1,
- "cifs_calculate_signature: can't generate signature\n");
- return -1;
- }
-
- rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
- if (rc) {
- cERROR(1, "cifs_calculate_signature: oould not init md5\n");
- return rc;
- }
-
- if (server->secType == RawNTLMSSP)
- crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
- server->session_key.data.ntlmv2.key,
- CIFS_NTLMV2_SESSKEY_SIZE);
- else
- crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
- (char *)&server->session_key.data,
- server->session_key.len);
-
- crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
- cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+ cifs_MD5_init(&context);
+ cifs_MD5_update(&context, (char *)&key->data, key->len);
+ cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
- rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature);
-
- return rc;
+ cifs_MD5_final(signature, &context);
+ return 0;
}
-
int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number)
{
@@ -101,7 +78,8 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
server->sequence_number++;
spin_unlock(&GlobalMid_Lock);
- rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
+ rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key,
+ smb_signature);
if (rc)
memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
else
@@ -111,39 +89,21 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
}
static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
- struct TCP_Server_Info *server, char *signature)
+ const struct mac_key *key, char *signature)
{
+ struct MD5Context context;
int i;
- int rc;
- if (iov == NULL || server == NULL || signature == NULL)
+ if ((iov == NULL) || (signature == NULL) || (key == NULL))
return -EINVAL;
- if (!server->ntlmssp.sdescmd5) {
- cERROR(1, "cifs_calc_signature2: can't generate signature\n");
- return -1;
- }
-
- rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
- if (rc) {
- cERROR(1, "cifs_calc_signature2: oould not init md5\n");
- return rc;
- }
-
- if (server->secType == RawNTLMSSP)
- crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
- server->session_key.data.ntlmv2.key,
- CIFS_NTLMV2_SESSKEY_SIZE);
- else
- crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
- (char *)&server->session_key.data,
- server->session_key.len);
-
+ cifs_MD5_init(&context);
+ cifs_MD5_update(&context, (char *)&key->data, key->len);
for (i = 0; i < n_vec; i++) {
if (iov[i].iov_len == 0)
continue;
if (iov[i].iov_base == NULL) {
- cERROR(1, "cifs_calc_signature2: null iovec entry");
+ cERROR(1, "null iovec entry");
return -EIO;
}
/* The first entry includes a length field (which does not get
@@ -151,18 +111,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
if (i == 0) {
if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
break; /* nothing to sign or corrupt header */
- crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
- iov[i].iov_base + 4, iov[i].iov_len - 4);
+ cifs_MD5_update(&context, iov[0].iov_base+4,
+ iov[0].iov_len-4);
} else
- crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
- iov[i].iov_base, iov[i].iov_len);
+ cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
}
- rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature);
+ cifs_MD5_final(signature, &context);
- return rc;
+ return 0;
}
+
int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number)
{
@@ -185,7 +145,8 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
server->sequence_number++;
spin_unlock(&GlobalMid_Lock);
- rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
+ rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key,
+ smb_signature);
if (rc)
memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
else
@@ -195,14 +156,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
}
int cifs_verify_signature(struct smb_hdr *cifs_pdu,
- struct TCP_Server_Info *server,
+ const struct mac_key *mac_key,
__u32 expected_sequence_number)
{
- int rc;
+ unsigned int rc;
char server_response_sig[8];
char what_we_think_sig_should_be[20];
- if (cifs_pdu == NULL || server == NULL)
+ if ((cifs_pdu == NULL) || (mac_key == NULL))
return -EINVAL;
if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -231,7 +192,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
cpu_to_le32(expected_sequence_number);
cifs_pdu->Signature.Sequence.Reserved = 0;
- rc = cifs_calculate_signature(cifs_pdu, server,
+ rc = cifs_calculate_signature(cifs_pdu, mac_key,
what_we_think_sig_should_be);
if (rc)
@@ -248,7 +209,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
}
/* We fill in key by putting in 40 byte array which was allocated by caller */
-int cifs_calculate_session_key(struct session_key *key, const char *rn,
+int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
const char *password)
{
char temp_key[16];
@@ -306,52 +267,38 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
{
int rc = 0;
int len;
- char nt_hash[CIFS_NTHASH_SIZE];
+ char nt_hash[16];
+ struct HMACMD5Context *pctxt;
wchar_t *user;
wchar_t *domain;
- wchar_t *server;
- if (!ses->server->ntlmssp.sdeschmacmd5) {
- cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
- return -1;
- }
+ pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
+
+ if (pctxt == NULL)
+ return -ENOMEM;
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash);
- crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash,
- CIFS_NTHASH_SIZE);
-
- rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
- if (rc) {
- cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
- return rc;
- }
+ /* convert Domainname to unicode and uppercase */
+ hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
/* convert ses->userName to unicode and uppercase */
len = strlen(ses->userName);
user = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (user == NULL) {
- cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
- rc = -ENOMEM;
+ if (user == NULL)
goto calc_exit_2;
- }
len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
UniStrupr(user);
-
- crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
- (char *)user, 2 * len);
+ hmac_md5_update((char *)user, 2*len, pctxt);
/* convert ses->domainName to unicode and uppercase */
if (ses->domainName) {
len = strlen(ses->domainName);
domain = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (domain == NULL) {
- cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
- rc = -ENOMEM;
+ if (domain == NULL)
goto calc_exit_1;
- }
len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
nls_cp);
/* the following line was removed since it didn't work well
@@ -359,292 +306,65 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
Maybe converting the domain name earlier makes sense */
/* UniStrupr(domain); */
- crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
- (char *)domain, 2 * len);
+ hmac_md5_update((char *)domain, 2*len, pctxt);
kfree(domain);
- } else if (ses->serverName) {
- len = strlen(ses->serverName);
-
- server = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (server == NULL) {
- cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
- rc = -ENOMEM;
- goto calc_exit_1;
- }
- len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
- nls_cp);
- /* the following line was removed since it didn't work well
- with lower cased domain name that passed as an option.
- Maybe converting the domain name earlier makes sense */
- /* UniStrupr(domain); */
-
- crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
- (char *)server, 2 * len);
-
- kfree(server);
}
-
- rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
- ses->server->ntlmv2_hash);
-
calc_exit_1:
kfree(user);
calc_exit_2:
/* BB FIXME what about bytes 24 through 40 of the signing key?
compare with the NTLM example */
+ hmac_md5_final(ses->server->ntlmv2_hash, pctxt);
+ kfree(pctxt);
return rc;
}
-static int
-find_domain_name(struct cifsSesInfo *ses)
-{
- int rc = 0;
- unsigned int attrsize;
- unsigned int type;
- unsigned char *blobptr;
- struct ntlmssp2_name *attrptr;
-
- if (ses->server->tiblob) {
- blobptr = ses->server->tiblob;
- attrptr = (struct ntlmssp2_name *) blobptr;
-
- while ((type = attrptr->type) != 0) {
- blobptr += 2; /* advance attr type */
- attrsize = attrptr->length;
- blobptr += 2; /* advance attr size */
- if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
- if (!ses->domainName) {
- ses->domainName =
- kmalloc(attrptr->length + 1,
- GFP_KERNEL);
- if (!ses->domainName)
- return -ENOMEM;
- cifs_from_ucs2(ses->domainName,
- (__le16 *)blobptr,
- attrptr->length,
- attrptr->length,
- load_nls_default(), false);
- }
- }
- blobptr += attrsize; /* advance attr value */
- attrptr = (struct ntlmssp2_name *) blobptr;
- }
- } else {
- ses->server->tilen = 2 * sizeof(struct ntlmssp2_name);
- ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL);
- if (!ses->server->tiblob) {
- ses->server->tilen = 0;
- cERROR(1, "Challenge target info allocation failure");
- return -ENOMEM;
- }
- memset(ses->server->tiblob, 0x0, ses->server->tilen);
- attrptr = (struct ntlmssp2_name *) ses->server->tiblob;
- attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
- }
-
- return rc;
-}
-
-static int
-CalcNTLMv2_response(const struct TCP_Server_Info *server,
- char *v2_session_response)
-{
- int rc;
-
- if (!server->ntlmssp.sdeschmacmd5) {
- cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
- return -1;
- }
-
- crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash,
- CIFS_HMAC_MD5_HASH_SIZE);
-
- rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash);
- if (rc) {
- cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
- return rc;
- }
-
- memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
- server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE);
- crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
- v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
- sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE);
-
- if (server->tilen)
- crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
- server->tiblob, server->tilen);
-
- rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash,
- v2_session_response);
-
- return rc;
-}
-
-int
-setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
+void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
const struct nls_table *nls_cp)
{
- int rc = 0;
+ int rc;
struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
+ struct HMACMD5Context context;
buf->blob_signature = cpu_to_le32(0x00000101);
buf->reserved = 0;
buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
buf->reserved2 = 0;
-
- if (!ses->domainName) {
- rc = find_domain_name(ses);
- if (rc) {
- cERROR(1, "could not get domain/server name rc %d", rc);
- return rc;
- }
- }
+ buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
+ buf->names[0].length = 0;
+ buf->names[1].type = 0;
+ buf->names[1].length = 0;
/* calculate buf->ntlmv2_hash */
rc = calc_ntlmv2_hash(ses, nls_cp);
- if (rc) {
- cERROR(1, "could not get v2 hash rc %d", rc);
- return rc;
- }
- rc = CalcNTLMv2_response(ses->server, resp_buf);
- if (rc) {
+ if (rc)
cERROR(1, "could not get v2 hash rc %d", rc);
- return rc;
- }
-
- if (!ses->server->ntlmssp.sdeschmacmd5) {
- cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
- return -1;
- }
-
- crypto_shash_setkey(ses->server->ntlmssp.hmacmd5,
- ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+ CalcNTLMv2_response(ses, resp_buf);
- rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
- if (rc) {
- cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n");
- return rc;
- }
+ /* now calculate the MAC key for NTLMv2 */
+ hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
+ hmac_md5_update(resp_buf, 16, &context);
+ hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context);
- crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
- resp_buf, CIFS_HMAC_MD5_HASH_SIZE);
-
- rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
- ses->server->session_key.data.ntlmv2.key);
-
- memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf,
- sizeof(struct ntlmv2_resp));
- ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp);
-
- return rc;
+ memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf,
+ sizeof(struct ntlmv2_resp));
+ ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp);
}
-int
-calc_seckey(struct TCP_Server_Info *server)
-{
- int rc;
- unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE];
- struct crypto_blkcipher *tfm_arc4;
- struct scatterlist sgin, sgout;
- struct blkcipher_desc desc;
-
- get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
-
- tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)",
- 0, CRYPTO_ALG_ASYNC);
- if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
- cERROR(1, "could not allocate " "master crypto API arc4\n");
- return 1;
- }
-
- desc.tfm = tfm_arc4;
-
- crypto_blkcipher_setkey(tfm_arc4,
- server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE);
- sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE);
- sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
- rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
-
- if (!rc)
- memcpy(server->session_key.data.ntlmv2.key,
- sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
-
- crypto_free_blkcipher(tfm_arc4);
-
- return 0;
-}
-
-void
-cifs_crypto_shash_release(struct TCP_Server_Info *server)
-{
- if (server->ntlmssp.md5)
- crypto_free_shash(server->ntlmssp.md5);
-
- if (server->ntlmssp.hmacmd5)
- crypto_free_shash(server->ntlmssp.hmacmd5);
-
- kfree(server->ntlmssp.sdeschmacmd5);
-
- kfree(server->ntlmssp.sdescmd5);
-}
-
-int
-cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
+void CalcNTLMv2_response(const struct cifsSesInfo *ses,
+ char *v2_session_response)
{
- int rc;
- unsigned int size;
-
- server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
- if (!server->ntlmssp.hmacmd5 ||
- IS_ERR(server->ntlmssp.hmacmd5)) {
- cERROR(1, "could not allocate crypto hmacmd5\n");
- return 1;
- }
-
- server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0);
- if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) {
- cERROR(1, "could not allocate crypto md5\n");
- rc = 1;
- goto cifs_crypto_shash_allocate_ret1;
- }
-
- size = sizeof(struct shash_desc) +
- crypto_shash_descsize(server->ntlmssp.hmacmd5);
- server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
- if (!server->ntlmssp.sdeschmacmd5) {
- cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
- rc = -ENOMEM;
- goto cifs_crypto_shash_allocate_ret2;
- }
- server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5;
- server->ntlmssp.sdeschmacmd5->shash.flags = 0x0;
+ struct HMACMD5Context context;
+ /* rest of v2 struct already generated */
+ memcpy(v2_session_response + 8, ses->server->cryptKey, 8);
+ hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
+ hmac_md5_update(v2_session_response+8,
+ sizeof(struct ntlmv2_resp) - 8, &context);
- size = sizeof(struct shash_desc) +
- crypto_shash_descsize(server->ntlmssp.md5);
- server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL);
- if (!server->ntlmssp.sdescmd5) {
- cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
- rc = -ENOMEM;
- goto cifs_crypto_shash_allocate_ret3;
- }
- server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5;
- server->ntlmssp.sdescmd5->shash.flags = 0x0;
-
- return 0;
-
-cifs_crypto_shash_allocate_ret3:
- kfree(server->ntlmssp.sdeschmacmd5);
-
-cifs_crypto_shash_allocate_ret2:
- crypto_free_shash(server->ntlmssp.md5);
-
-cifs_crypto_shash_allocate_ret1:
- crypto_free_shash(server->ntlmssp.hmacmd5);
-
- return rc;
+ hmac_md5_final(v2_session_response, &context);
+/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c9d0cfc086e..0cdfb8c32ac 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,9 +25,6 @@
#include <linux/workqueue.h>
#include "cifs_fs_sb.h"
#include "cifsacl.h"
-#include <crypto/internal/hash.h>
-#include <linux/scatterlist.h>
-
/*
* The sizes of various internal tables and strings
*/
@@ -100,7 +97,7 @@ enum protocolEnum {
/* Netbios frames protocol not supported at this time */
};
-struct session_key {
+struct mac_key {
unsigned int len;
union {
char ntlm[CIFS_SESS_KEY_SIZE + 16];
@@ -123,21 +120,6 @@ struct cifs_cred {
struct cifs_ace *aces;
};
-struct sdesc {
- struct shash_desc shash;
- char ctx[];
-};
-
-struct ntlmssp_auth {
- __u32 client_flags;
- __u32 server_flags;
- unsigned char ciphertext[CIFS_CPHTXT_SIZE];
- struct crypto_shash *hmacmd5;
- struct crypto_shash *md5;
- struct sdesc *sdeschmacmd5;
- struct sdesc *sdescmd5;
-};
-
/*
*****************************************************************
* Except the CIFS PDUs themselves all the
@@ -200,14 +182,11 @@ struct TCP_Server_Info {
/* 16th byte of RFC1001 workstation name is always null */
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
__u32 sequence_number; /* needed for CIFS PDU signature */
- struct session_key session_key;
+ struct mac_key mac_signing_key;
char ntlmv2_hash[16];
unsigned long lstrp; /* when we got last response from this server */
u16 dialect; /* dialect index that server chose */
/* extended security flavors that server supports */
- unsigned int tilen; /* length of the target info blob */
- unsigned char *tiblob; /* target info blob in challenge response */
- struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */
bool sec_kerberos; /* supports plain Kerberos */
bool sec_mskerberos; /* supports legacy MS Kerberos */
bool sec_kerberosu2u; /* supports U2U Kerberos */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 320e0fd0ba7..14d036d8db1 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -134,12 +134,6 @@
* Size of the session key (crypto key encrypted with the password
*/
#define CIFS_SESS_KEY_SIZE (24)
-#define CIFS_CLIENT_CHALLENGE_SIZE (8)
-#define CIFS_SERVER_CHALLENGE_SIZE (8)
-#define CIFS_HMAC_MD5_HASH_SIZE (16)
-#define CIFS_CPHTXT_SIZE (16)
-#define CIFS_NTLMV2_SESSKEY_SIZE (16)
-#define CIFS_NTHASH_SIZE (16)
/*
* Maximum user name length
@@ -669,6 +663,7 @@ struct ntlmv2_resp {
__le64 time;
__u64 client_chal; /* random */
__u32 reserved2;
+ struct ntlmssp2_name names[2];
/* array of name entries could follow ending in minimum 4 byte struct */
} __attribute__((packed));
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1378d913384..1d60c655e3e 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -87,8 +87,9 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
+extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port);
extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
- unsigned short int port);
+ const unsigned short int port);
extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
extern void header_assemble(struct smb_hdr *, char /* command */ ,
const struct cifsTconInfo *, int /* length of
@@ -361,15 +362,13 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
__u32 *);
extern int cifs_verify_signature(struct smb_hdr *,
- struct TCP_Server_Info *server,
+ const struct mac_key *mac_key,
__u32 expected_sequence_number);
-extern int cifs_calculate_session_key(struct session_key *key, const char *rn,
+extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
const char *pass);
-extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
+extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
+extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
const struct nls_table *);
-extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
-extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
-extern int calc_seckey(struct TCP_Server_Info *);
#ifdef CONFIG_CIFS_WEAK_PW_HASH
extern void calc_lanman_hash(const char *password, const char *cryptkey,
bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4bda920d1f7..7e83b356cc9 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -232,7 +232,7 @@ static int
small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
void **request_buf)
{
- int rc = 0;
+ int rc;
rc = cifs_reconnect_tcon(tcon, smb_command);
if (rc)
@@ -250,7 +250,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
if (tcon != NULL)
cifs_stats_inc(&tcon->num_smbs_sent);
- return rc;
+ return 0;
}
int
@@ -281,16 +281,9 @@ small_smb_init_no_tc(const int smb_command, const int wct,
/* If the return code is zero, this function must fill in request_buf pointer */
static int
-smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
- void **request_buf /* returned */ ,
- void **response_buf /* returned */ )
+__smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
+ void **request_buf, void **response_buf)
{
- int rc = 0;
-
- rc = cifs_reconnect_tcon(tcon, smb_command);
- if (rc)
- return rc;
-
*request_buf = cifs_buf_get();
if (*request_buf == NULL) {
/* BB should we add a retry in here if not a writepage? */
@@ -309,7 +302,31 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
if (tcon != NULL)
cifs_stats_inc(&tcon->num_smbs_sent);
- return rc;
+ return 0;
+}
+
+/* If the return code is zero, this function must fill in request_buf pointer */
+static int
+smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
+ void **request_buf, void **response_buf)
+{
+ int rc;
+
+ rc = cifs_reconnect_tcon(tcon, smb_command);
+ if (rc)
+ return rc;
+
+ return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
+}
+
+static int
+smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon,
+ void **request_buf, void **response_buf)
+{
+ if (tcon->ses->need_reconnect || tcon->need_reconnect)
+ return -EHOSTDOWN;
+
+ return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
}
static int validate_t2(struct smb_t2_rsp *pSMB)
@@ -604,14 +621,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
else
rc = -EINVAL;
- if (server->secType == Kerberos) {
- if (!server->sec_kerberos &&
- !server->sec_mskerberos)
- rc = -EOPNOTSUPP;
- } else if (server->secType == RawNTLMSSP) {
- if (!server->sec_ntlmssp)
- rc = -EOPNOTSUPP;
- } else
+ if (server->sec_kerberos || server->sec_mskerberos)
+ server->secType = Kerberos;
+ else if (server->sec_ntlmssp)
+ server->secType = RawNTLMSSP;
+ else
rc = -EOPNOTSUPP;
}
} else
@@ -4537,8 +4551,8 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon)
cFYI(1, "In QFSUnixInfo");
QFSUnixRetry:
- rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
- (void **) &pSMBr);
+ rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
+ (void **) &pSMB, (void **) &pSMBr);
if (rc)
return rc;
@@ -4607,8 +4621,8 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap)
cFYI(1, "In SETFSUnixInfo");
SETFSUnixRetry:
/* BB switch to small buf init to save memory */
- rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
- (void **) &pSMBr);
+ rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
+ (void **) &pSMB, (void **) &pSMBr);
if (rc)
return rc;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ec0ea4a43bd..88c84a38bcc 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -400,7 +400,9 @@ incomplete_rcv:
cFYI(1, "call to reconnect done");
csocket = server->ssocket;
continue;
- } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) {
+ } else if (length == -ERESTARTSYS ||
+ length == -EAGAIN ||
+ length == -EINTR) {
msleep(1); /* minimum sleep to prevent looping
allowing socket to clear and app threads to set
tcpStatus CifsNeedReconnect if server hung */
@@ -414,18 +416,6 @@ incomplete_rcv:
} else
continue;
} else if (length <= 0) {
- if (server->tcpStatus == CifsNew) {
- cFYI(1, "tcp session abend after SMBnegprot");
- /* some servers kill the TCP session rather than
- returning an SMB negprot error, in which
- case reconnecting here is not going to help,
- and so simply return error to mount */
- break;
- }
- if (!try_to_freeze() && (length == -EINTR)) {
- cFYI(1, "cifsd thread killed");
- break;
- }
cFYI(1, "Reconnect after unexpected peek error %d",
length);
cifs_reconnect(server);
@@ -466,27 +456,19 @@ incomplete_rcv:
an error on SMB negprot response */
cFYI(1, "Negative RFC1002 Session Response Error 0x%x)",
pdu_length);
- if (server->tcpStatus == CifsNew) {
- /* if nack on negprot (rather than
- ret of smb negprot error) reconnecting
- not going to help, ret error to mount */
- break;
- } else {
- /* give server a second to
- clean up before reconnect attempt */
- msleep(1000);
- /* always try 445 first on reconnect
- since we get NACK on some if we ever
- connected to port 139 (the NACK is
- since we do not begin with RFC1001
- session initialize frame) */
- server->addr.sockAddr.sin_port =
- htons(CIFS_PORT);
- cifs_reconnect(server);
- csocket = server->ssocket;
- wake_up(&server->response_q);
- continue;
- }
+ /* give server a second to clean up */
+ msleep(1000);
+ /* always try 445 first on reconnect since we get NACK
+ * on some if we ever connected to port 139 (the NACK
+ * is since we do not begin with RFC1001 session
+ * initialize frame)
+ */
+ cifs_set_port((struct sockaddr *)
+ &server->addr.sockAddr, CIFS_PORT);
+ cifs_reconnect(server);
+ csocket = server->ssocket;
+ wake_up(&server->response_q);
+ continue;
} else if (temp != (char) 0) {
cERROR(1, "Unknown RFC 1002 frame");
cifs_dump_mem(" Received Data: ", (char *)smb_buffer,
@@ -522,8 +504,7 @@ incomplete_rcv:
total_read += length) {
length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
pdu_length - total_read, 0);
- if ((server->tcpStatus == CifsExiting) ||
- (length == -EINTR)) {
+ if (server->tcpStatus == CifsExiting) {
/* then will exit */
reconnect = 2;
break;
@@ -534,8 +515,9 @@ incomplete_rcv:
/* Now we will reread sock */
reconnect = 1;
break;
- } else if ((length == -ERESTARTSYS) ||
- (length == -EAGAIN)) {
+ } else if (length == -ERESTARTSYS ||
+ length == -EAGAIN ||
+ length == -EINTR) {
msleep(1); /* minimum sleep to prevent looping,
allowing socket to clear and app
threads to set tcpStatus
@@ -1708,7 +1690,6 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
CIFSSMBLogoff(xid, ses);
_FreeXid(xid);
}
- cifs_crypto_shash_release(server);
sesInfoFree(ses);
cifs_put_tcp_session(server);
}
@@ -1725,9 +1706,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
if (ses) {
cFYI(1, "Existing smb sess found (status=%d)", ses->status);
- /* existing SMB ses has a server reference already */
- cifs_put_tcp_session(server);
-
mutex_lock(&ses->session_mutex);
rc = cifs_negotiate_protocol(xid, ses);
if (rc) {
@@ -1750,6 +1728,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
}
}
mutex_unlock(&ses->session_mutex);
+
+ /* existing SMB ses has a server reference already */
+ cifs_put_tcp_session(server);
FreeXid(xid);
return ses;
}
@@ -1788,23 +1769,13 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
ses->linux_uid = volume_info->linux_uid;
ses->overrideSecFlg = volume_info->secFlg;
- rc = cifs_crypto_shash_allocate(server);
- if (rc) {
- cERROR(1, "could not setup hash structures rc %d", rc);
- goto get_ses_fail;
- }
- server->tilen = 0;
- server->tiblob = NULL;
-
mutex_lock(&ses->session_mutex);
rc = cifs_negotiate_protocol(xid, ses);
if (!rc)
rc = cifs_setup_session(xid, ses, volume_info->local_nls);
mutex_unlock(&ses->session_mutex);
- if (rc) {
- cifs_crypto_shash_release(ses->server);
+ if (rc)
goto get_ses_fail;
- }
/* success, put it on the list */
write_lock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 86a164f08a7..53cce8cc222 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -801,6 +801,8 @@ retry_iget5_locked:
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
+ if (S_ISREG(inode->i_mode))
+ inode->i_data.backing_dev_info = sb->s_bdi;
#ifdef CONFIG_CIFS_FSCACHE
/* initialize per-inode cache cookie pointer */
CIFS_I(inode)->fscache = NULL;
@@ -1462,29 +1464,18 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
{
char *fromName = NULL;
char *toName = NULL;
- struct cifs_sb_info *cifs_sb_source;
- struct cifs_sb_info *cifs_sb_target;
+ struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *tcon;
FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
FILE_UNIX_BASIC_INFO *info_buf_target;
int xid, rc, tmprc;
- cifs_sb_target = CIFS_SB(target_dir->i_sb);
- cifs_sb_source = CIFS_SB(source_dir->i_sb);
- tcon = cifs_sb_source->tcon;
+ cifs_sb = CIFS_SB(source_dir->i_sb);
+ tcon = cifs_sb->tcon;
xid = GetXid();
/*
- * BB: this might be allowed if same server, but different share.
- * Consider adding support for this
- */
- if (tcon != cifs_sb_target->tcon) {
- rc = -EXDEV;
- goto cifs_rename_exit;
- }
-
- /*
* we already have the rename sem so we do not need to
* grab it again here to protect the path integrity
*/
@@ -1519,17 +1510,16 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
info_buf_target = info_buf_source + 1;
tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName,
info_buf_source,
- cifs_sb_source->local_nls,
- cifs_sb_source->mnt_cifs_flags &
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
if (tmprc != 0)
goto unlink_target;
- tmprc = CIFSSMBUnixQPathInfo(xid, tcon,
- toName, info_buf_target,
- cifs_sb_target->local_nls,
- /* remap based on source sb */
- cifs_sb_source->mnt_cifs_flags &
+ tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName,
+ info_buf_target,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
if (tmprc == 0 && (info_buf_source->UniqueId ==
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index f97851119e6..9aad47a2d62 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -206,26 +206,30 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
}
int
-cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
- const unsigned short int port)
+cifs_set_port(struct sockaddr *addr, const unsigned short int port)
{
- if (!cifs_convert_address(dst, src, len))
- return 0;
-
- switch (dst->sa_family) {
+ switch (addr->sa_family) {
case AF_INET:
- ((struct sockaddr_in *)dst)->sin_port = htons(port);
+ ((struct sockaddr_in *)addr)->sin_port = htons(port);
break;
case AF_INET6:
- ((struct sockaddr_in6 *)dst)->sin6_port = htons(port);
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
break;
default:
return 0;
}
-
return 1;
}
+int
+cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
+ const unsigned short int port)
+{
+ if (!cifs_convert_address(dst, src, len))
+ return 0;
+ return cifs_set_port(dst, port);
+}
+
/*****************************************************************************
convert a NT status code to a dos class/code
*****************************************************************************/
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 1db0f0746a5..49c9a4e7531 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -61,19 +61,6 @@
#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000
#define NTLMSSP_NEGOTIATE_56 0x80000000
-/* Define AV Pair Field IDs */
-#define NTLMSSP_AV_EOL 0
-#define NTLMSSP_AV_NB_COMPUTER_NAME 1
-#define NTLMSSP_AV_NB_DOMAIN_NAME 2
-#define NTLMSSP_AV_DNS_COMPUTER_NAME 3
-#define NTLMSSP_AV_DNS_DOMAIN_NAME 4
-#define NTLMSSP_AV_DNS_TREE_NAME 5
-#define NTLMSSP_AV_FLAGS 6
-#define NTLMSSP_AV_TIMESTAMP 7
-#define NTLMSSP_AV_RESTRICTION 8
-#define NTLMSSP_AV_TARGET_NAME 9
-#define NTLMSSP_AV_CHANNEL_BINDINGS 10
-
/* Although typedefs are not commonly used for structure definitions */
/* in the Linux kernel, in this particular case they are useful */
/* to more closely match the standards document for NTLMSSP from */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 795095f4eac..0a57cb7db5d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -383,9 +383,6 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
struct cifsSesInfo *ses)
{
- unsigned int tioffset; /* challeng message target info area */
- unsigned int tilen; /* challeng message target info area length */
-
CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
@@ -408,20 +405,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
/* BB spec says that if AvId field of MsvAvTimestamp is populated then
we must set the MIC field of the AUTHENTICATE_MESSAGE */
- ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags);
-
- tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
- tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
- ses->server->tilen = tilen;
- if (tilen) {
- ses->server->tiblob = kmalloc(tilen, GFP_KERNEL);
- if (!ses->server->tiblob) {
- cERROR(1, "Challenge target info allocation failure");
- return -ENOMEM;
- }
- memcpy(ses->server->tiblob, bcc_ptr + tioffset, tilen);
- }
-
return 0;
}
@@ -442,13 +425,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
/* BB is NTLMV2 session security format easier to use here? */
flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
- NTLMSSP_NEGOTIATE_NTLM;
+ NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
if (ses->server->secMode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
- flags |= NTLMSSP_NEGOTIATE_SIGN |
- NTLMSSP_NEGOTIATE_KEY_XCH |
- NTLMSSP_NEGOTIATE_EXTENDED_SEC;
- }
+ (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ flags |= NTLMSSP_NEGOTIATE_SIGN;
+ if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
+ flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
sec_blob->NegotiateFlags |= cpu_to_le32(flags);
@@ -469,12 +451,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
struct cifsSesInfo *ses,
const struct nls_table *nls_cp, bool first)
{
- int rc;
- unsigned int size;
AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
__u32 flags;
unsigned char *tmp;
- struct ntlmv2_resp ntlmv2_response = {};
+ char ntlm_session_key[CIFS_SESS_KEY_SIZE];
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
@@ -497,25 +477,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->LmChallengeResponse.Length = 0;
sec_blob->LmChallengeResponse.MaximumLength = 0;
- sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
- rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp);
- if (rc) {
- cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc);
- goto setup_ntlmv2_ret;
- }
- size = sizeof(struct ntlmv2_resp);
- memcpy(tmp, (char *)&ntlmv2_response, size);
- tmp += size;
- if (ses->server->tilen > 0) {
- memcpy(tmp, ses->server->tiblob, ses->server->tilen);
- tmp += ses->server->tilen;
- } else
- ses->server->tilen = 0;
+ /* calculate session key, BB what about adding similar ntlmv2 path? */
+ SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key);
+ if (first)
+ cifs_calculate_mac_key(&ses->server->mac_signing_key,
+ ntlm_session_key, ses->password);
- sec_blob->NtChallengeResponse.Length = cpu_to_le16(size +
- ses->server->tilen);
+ memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE);
+ sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE);
sec_blob->NtChallengeResponse.MaximumLength =
- cpu_to_le16(size + ses->server->tilen);
+ cpu_to_le16(CIFS_SESS_KEY_SIZE);
+
+ tmp += CIFS_SESS_KEY_SIZE;
if (ses->domainName == NULL) {
sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -527,6 +501,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
len = cifs_strtoUCS((__le16 *)tmp, ses->domainName,
MAX_USERNAME_SIZE, nls_cp);
len *= 2; /* unicode is 2 bytes each */
+ len += 2; /* trailing null */
sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
sec_blob->DomainName.Length = cpu_to_le16(len);
sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
@@ -543,6 +518,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
len = cifs_strtoUCS((__le16 *)tmp, ses->userName,
MAX_USERNAME_SIZE, nls_cp);
len *= 2; /* unicode is 2 bytes each */
+ len += 2; /* trailing null */
sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
sec_blob->UserName.Length = cpu_to_le16(len);
sec_blob->UserName.MaximumLength = cpu_to_le16(len);
@@ -554,26 +530,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->WorkstationName.MaximumLength = 0;
tmp += 2;
- if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
- !calc_seckey(ses->server)) {
- memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
- sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
- sec_blob->SessionKey.MaximumLength =
- cpu_to_le16(CIFS_CPHTXT_SIZE);
- tmp += CIFS_CPHTXT_SIZE;
- } else {
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
- sec_blob->SessionKey.Length = 0;
- sec_blob->SessionKey.MaximumLength = 0;
- }
-
- ses->server->sequence_number = 0;
-
-setup_ntlmv2_ret:
- if (ses->server->tilen > 0)
- kfree(ses->server->tiblob);
-
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.Length = 0;
+ sec_blob->SessionKey.MaximumLength = 0;
return tmp - pbuffer;
}
@@ -587,14 +546,15 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
return;
}
-static int setup_ntlmssp_auth_req(char *ntlmsspblob,
+static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
struct cifsSesInfo *ses,
const struct nls_table *nls, bool first_time)
{
int bloblen;
- bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls,
+ bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls,
first_time);
+ pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen);
return bloblen;
}
@@ -730,7 +690,7 @@ ssetup_ntlmssp_authenticate:
if (first_time) /* should this be moved into common code
with similar ntlmv2 path? */
- cifs_calculate_session_key(&ses->server->session_key,
+ cifs_calculate_mac_key(&ses->server->mac_signing_key,
ntlm_session_key, ses->password);
/* copy session key */
@@ -769,21 +729,12 @@ ssetup_ntlmssp_authenticate:
cpu_to_le16(sizeof(struct ntlmv2_resp));
/* calculate session key */
- rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
- if (rc) {
- kfree(v2_sess_key);
- goto ssetup_exit;
- }
+ setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
/* FIXME: calculate MAC key */
memcpy(bcc_ptr, (char *)v2_sess_key,
sizeof(struct ntlmv2_resp));
bcc_ptr += sizeof(struct ntlmv2_resp);
kfree(v2_sess_key);
- if (ses->server->tilen > 0) {
- memcpy(bcc_ptr, ses->server->tiblob,
- ses->server->tilen);
- bcc_ptr += ses->server->tilen;
- }
if (ses->capabilities & CAP_UNICODE) {
if (iov[0].iov_len % 2) {
*bcc_ptr = 0;
@@ -814,15 +765,15 @@ ssetup_ntlmssp_authenticate:
}
/* bail out if key is too long */
if (msg->sesskey_len >
- sizeof(ses->server->session_key.data.krb5)) {
+ sizeof(ses->server->mac_signing_key.data.krb5)) {
cERROR(1, "Kerberos signing key too long (%u bytes)",
msg->sesskey_len);
rc = -EOVERFLOW;
goto ssetup_exit;
}
if (first_time) {
- ses->server->session_key.len = msg->sesskey_len;
- memcpy(ses->server->session_key.data.krb5,
+ ses->server->mac_signing_key.len = msg->sesskey_len;
+ memcpy(ses->server->mac_signing_key.data.krb5,
msg->data, msg->sesskey_len);
}
pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
@@ -864,28 +815,12 @@ ssetup_ntlmssp_authenticate:
if (phase == NtLmNegotiate) {
setup_ntlmssp_neg_req(pSMB, ses);
iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
- iov[1].iov_base = &pSMB->req.SecurityBlob[0];
} else if (phase == NtLmAuthenticate) {
int blob_len;
- char *ntlmsspblob;
-
- ntlmsspblob = kmalloc(5 *
- sizeof(struct _AUTHENTICATE_MESSAGE),
- GFP_KERNEL);
- if (!ntlmsspblob) {
- cERROR(1, "Can't allocate NTLMSSP");
- rc = -ENOMEM;
- goto ssetup_exit;
- }
-
- blob_len = setup_ntlmssp_auth_req(ntlmsspblob,
- ses,
- nls_cp,
- first_time);
+ blob_len = setup_ntlmssp_auth_req(pSMB, ses,
+ nls_cp,
+ first_time);
iov[1].iov_len = blob_len;
- iov[1].iov_base = ntlmsspblob;
- pSMB->req.SecurityBlobLength =
- cpu_to_le16(blob_len);
/* Make sure that we tell the server that we
are using the uid that it just gave us back
on the response (challenge) */
@@ -895,6 +830,7 @@ ssetup_ntlmssp_authenticate:
rc = -ENOSYS;
goto ssetup_exit;
}
+ iov[1].iov_base = &pSMB->req.SecurityBlob[0];
/* unicode strings must be word aligned */
if ((iov[0].iov_len + iov[1].iov_len) % 2) {
*bcc_ptr = 0;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index e0588cdf4cc..82f78c4d697 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(midQ->resp_buf,
- ses->server,
+ &ses->server->mac_signing_key,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(out_buf,
- ses->server,
+ &ses->server->mac_signing_key,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(out_buf,
- ses->server,
+ &ses->server->mac_signing_key,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index de89645777c..116af7546cf 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -184,8 +184,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
}
/* adjust outsize. is this useful ?? */
- req->uc_outSize = nbytes;
- req->uc_flags |= REQ_WRITE;
+ req->uc_outSize = nbytes;
+ req->uc_flags |= CODA_REQ_WRITE;
count = nbytes;
/* Convert filedescriptor into a file handle */
diff --git a/fs/compat.c b/fs/compat.c
index 718c7062aec..0644a154672 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
{
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov;
+ struct iovec *iov = iovstack;
ssize_t ret;
io_fn_t fn;
iov_fn_t fnv;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 51f270b479b..48d74c7391d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -634,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio)
int ret = 0;
if (dio->bio) {
- loff_t cur_offset = dio->block_in_file << dio->blkbits;
+ loff_t cur_offset = dio->cur_page_fs_offset;
loff_t bio_next_offset = dio->logical_offset_in_bio +
dio->bio->bi_size;
@@ -659,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio)
* Submit now if the underlying fs is about to perform a
* metadata read
*/
- if (dio->boundary)
+ else if (dio->boundary)
dio_bio_submit(dio);
}
diff --git a/fs/exec.c b/fs/exec.c
index 2d945528274..828dd2461d6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max)
argv++;
if (i++ >= max)
return -E2BIG;
+
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
cond_resched();
}
}
@@ -419,6 +422,12 @@ static int copy_strings(int argc, const char __user *const __user *argv,
while (len > 0) {
int offset, bytes_to_copy;
+ if (fatal_signal_pending(current)) {
+ ret = -ERESTARTNOHAND;
+ goto out;
+ }
+ cond_resched();
+
offset = pos % PAGE_SIZE;
if (offset == 0)
offset = PAGE_SIZE;
@@ -594,6 +603,11 @@ int setup_arg_pages(struct linux_binprm *bprm,
#else
stack_top = arch_align_stack(stack_top);
stack_top = PAGE_ALIGN(stack_top);
+
+ if (unlikely(stack_top < mmap_min_addr) ||
+ unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
+ return -ENOMEM;
+
stack_shift = vma->vm_end - stack_top;
bprm->p -= stack_shift;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6769fd0f35b..f8cc34f542c 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -769,11 +769,15 @@ EXPORT_SYMBOL(kill_fasync);
static int __init fcntl_init(void)
{
- /* please add new bits here to ensure allocation uniqueness */
- BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ /*
+ * Please add new bits here to ensure allocation uniqueness.
+ * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
+ * is defined as O_NONBLOCK on some platforms and not on others.
+ */
+ BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
- O_TRUNC | O_APPEND | O_NONBLOCK |
+ O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7d9d06ba184..ab38fef1c9a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -52,8 +52,6 @@ struct wb_writeback_work {
#define CREATE_TRACE_POINTS
#include <trace/events/writeback.h>
-#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
-
/*
* We don't actually have pdflush, but this one is exported though /proc...
*/
@@ -71,6 +69,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
return test_bit(BDI_writeback_running, &bdi->state);
}
+static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ if (strcmp(sb->s_type->name, "bdev") == 0)
+ return inode->i_mapping->backing_dev_info;
+
+ return sb->s_bdi;
+}
+
static void bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
@@ -808,7 +816,7 @@ int bdi_writeback_thread(void *data)
wb->last_active = jiffies;
set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&bdi->work_list)) {
+ if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
__set_current_state(TASK_RUNNING);
continue;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index d367af1514e..cde755cca56 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1354,7 +1354,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
loff_t file_size;
unsigned int num;
unsigned int offset;
- size_t total_len;
+ size_t total_len = 0;
req = fuse_get_req(fc);
if (IS_ERR(req))
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cde1248a622..ac750bd31a6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -932,7 +932,7 @@ int gfs2_logd(void *data)
do {
prepare_to_wait(&sdp->sd_logd_waitq, &wait,
- TASK_UNINTERRUPTIBLE);
+ TASK_INTERRUPTIBLE);
if (!gfs2_ail_flush_reqd(sdp) &&
!gfs2_jrnl_flush_reqd(sdp) &&
!kthread_should_stop())
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index e20ee85955d..f3f3578393a 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
inode_inc_link_count(dir);
- inode = minix_new_inode(dir, mode, &err);
+ inode = minix_new_inode(dir, S_IFDIR | mode, &err);
if (!inode)
goto out_dir;
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 6c2aad49d73..f7e13db613c 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -63,6 +63,7 @@ config NFS_V3_ACL
config NFS_V4
bool "NFS client support for NFS version 4"
depends on NFS_FS
+ select SUNRPC_GSS
help
This option enables support for version 4 of the NFS protocol
(RFC 3530) in the kernel's NFS client.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4e7df2adb21..e7340729af8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -275,7 +275,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
sin1->sin6_scope_id != sin2->sin6_scope_id)
return 0;
- return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
+ return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
}
#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eb51bd6201d..05bf3c0dc75 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -723,10 +723,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
default:
BUG();
}
- if (res < 0)
- dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
- " - error %d!\n",
- __func__, res);
return res;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ec3966e4706..f4cbf0c306c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -431,7 +431,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
goto out_err;
error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
+ if (unlikely(error == -ESTALE)) {
+ struct dentry *pd_dentry;
+ pd_dentry = dget_parent(dentry);
+ if (pd_dentry != NULL) {
+ nfs_zap_caches(pd_dentry->d_inode);
+ dput(pd_dentry);
+ }
+ }
nfs_free_fattr(res.fattr);
if (error < 0)
goto out_err;
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 95932f523ae..4264377552e 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -69,6 +69,7 @@ config NFSD_V4
depends on NFSD && PROC_FS && EXPERIMENTAL
select NFSD_V3
select FS_POSIX_ACL
+ select SUNRPC_GSS
help
This option enables support in your system's NFS server for
version 4 of the NFS protocol (RFC 3530).
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index a76e0aa5cd3..391915093fe 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
}
inode->i_mode = new_mode;
+ inode->i_ctime = CURRENT_TIME;
di->i_mode = cpu_to_le16(inode->i_mode);
+ di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 41d5f1f92d5..52c7557f3e2 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -62,10 +62,51 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
static LIST_HEAD(o2hb_node_events);
static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
+/*
+ * In global heartbeat, we maintain a series of region bitmaps.
+ * - o2hb_region_bitmap allows us to limit the region number to max region.
+ * - o2hb_live_region_bitmap tracks live regions (seen steady iterations).
+ * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes
+ * heartbeat on it.
+ * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts.
+ */
+static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
+static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
+static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
+static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
+
+#define O2HB_DB_TYPE_LIVENODES 0
+#define O2HB_DB_TYPE_LIVEREGIONS 1
+#define O2HB_DB_TYPE_QUORUMREGIONS 2
+#define O2HB_DB_TYPE_FAILEDREGIONS 3
+#define O2HB_DB_TYPE_REGION_LIVENODES 4
+#define O2HB_DB_TYPE_REGION_NUMBER 5
+#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
+struct o2hb_debug_buf {
+ int db_type;
+ int db_size;
+ int db_len;
+ void *db_data;
+};
+
+static struct o2hb_debug_buf *o2hb_db_livenodes;
+static struct o2hb_debug_buf *o2hb_db_liveregions;
+static struct o2hb_debug_buf *o2hb_db_quorumregions;
+static struct o2hb_debug_buf *o2hb_db_failedregions;
+
#define O2HB_DEBUG_DIR "o2hb"
#define O2HB_DEBUG_LIVENODES "livenodes"
+#define O2HB_DEBUG_LIVEREGIONS "live_regions"
+#define O2HB_DEBUG_QUORUMREGIONS "quorum_regions"
+#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
+#define O2HB_DEBUG_REGION_NUMBER "num"
+#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
+
static struct dentry *o2hb_debug_dir;
static struct dentry *o2hb_debug_livenodes;
+static struct dentry *o2hb_debug_liveregions;
+static struct dentry *o2hb_debug_quorumregions;
+static struct dentry *o2hb_debug_failedregions;
static LIST_HEAD(o2hb_all_regions);
@@ -77,7 +118,19 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
#define O2HB_DEFAULT_BLOCK_BITS 9
+enum o2hb_heartbeat_modes {
+ O2HB_HEARTBEAT_LOCAL = 0,
+ O2HB_HEARTBEAT_GLOBAL,
+ O2HB_HEARTBEAT_NUM_MODES,
+};
+
+char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
+ "local", /* O2HB_HEARTBEAT_LOCAL */
+ "global", /* O2HB_HEARTBEAT_GLOBAL */
+};
+
unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
+unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
/* Only sets a new threshold if there are no active regions.
*
@@ -94,6 +147,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold)
}
}
+static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode)
+{
+ int ret = -1;
+
+ if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) {
+ spin_lock(&o2hb_live_lock);
+ if (list_empty(&o2hb_all_regions)) {
+ o2hb_heartbeat_mode = hb_mode;
+ ret = 0;
+ }
+ spin_unlock(&o2hb_live_lock);
+ }
+
+ return ret;
+}
+
struct o2hb_node_event {
struct list_head hn_item;
enum o2hb_callback_type hn_event_type;
@@ -135,6 +204,18 @@ struct o2hb_region {
struct block_device *hr_bdev;
struct o2hb_disk_slot *hr_slots;
+ /* live node map of this region */
+ unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
+ unsigned int hr_region_num;
+
+ struct dentry *hr_debug_dir;
+ struct dentry *hr_debug_livenodes;
+ struct dentry *hr_debug_regnum;
+ struct dentry *hr_debug_elapsed_time;
+ struct o2hb_debug_buf *hr_db_livenodes;
+ struct o2hb_debug_buf *hr_db_regnum;
+ struct o2hb_debug_buf *hr_db_elapsed_time;
+
/* let the person setting up hb wait for it to return until it
* has reached a 'steady' state. This will be fixed when we have
* a more complete api that doesn't lead to this sort of fragility. */
@@ -163,8 +244,19 @@ struct o2hb_bio_wait_ctxt {
int wc_error;
};
+static int o2hb_pop_count(void *map, int count)
+{
+ int i = -1, pop = 0;
+
+ while ((i = find_next_bit(map, count, i + 1)) < count)
+ pop++;
+ return pop;
+}
+
static void o2hb_write_timeout(struct work_struct *work)
{
+ int failed, quorum;
+ unsigned long flags;
struct o2hb_region *reg =
container_of(work, struct o2hb_region,
hr_write_timeout_work.work);
@@ -172,6 +264,28 @@ static void o2hb_write_timeout(struct work_struct *work)
mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
"milliseconds\n", reg->hr_dev_name,
jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
+
+ if (o2hb_global_heartbeat_active()) {
+ spin_lock_irqsave(&o2hb_live_lock, flags);
+ if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
+ set_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
+ failed = o2hb_pop_count(&o2hb_failed_region_bitmap,
+ O2NM_MAX_REGIONS);
+ quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS);
+ spin_unlock_irqrestore(&o2hb_live_lock, flags);
+
+ mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n",
+ quorum, failed);
+
+ /*
+ * Fence if the number of failed regions >= half the number
+ * of quorum regions
+ */
+ if ((failed << 1) < quorum)
+ return;
+ }
+
o2quo_disk_timeout();
}
@@ -180,6 +294,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg)
mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
O2HB_MAX_WRITE_TIMEOUT_MS);
+ if (o2hb_global_heartbeat_active()) {
+ spin_lock(&o2hb_live_lock);
+ clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
+ spin_unlock(&o2hb_live_lock);
+ }
cancel_delayed_work(&reg->hr_write_timeout_work);
reg->hr_last_timeout_start = jiffies;
schedule_delayed_work(&reg->hr_write_timeout_work,
@@ -513,6 +632,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event,
{
assert_spin_locked(&o2hb_live_lock);
+ BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB));
+
event->hn_event_type = type;
event->hn_node = node;
event->hn_node_num = node_num;
@@ -554,6 +675,35 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
o2nm_node_put(node);
}
+static void o2hb_set_quorum_device(struct o2hb_region *reg,
+ struct o2hb_disk_slot *slot)
+{
+ assert_spin_locked(&o2hb_live_lock);
+
+ if (!o2hb_global_heartbeat_active())
+ return;
+
+ if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
+ return;
+
+ /*
+ * A region can be added to the quorum only when it sees all
+ * live nodes heartbeat on it. In other words, the region has been
+ * added to all nodes.
+ */
+ if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
+ sizeof(o2hb_live_node_bitmap)))
+ return;
+
+ if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD)
+ return;
+
+ printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n",
+ config_item_name(&reg->hr_item));
+
+ set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
+}
+
static int o2hb_check_slot(struct o2hb_region *reg,
struct o2hb_disk_slot *slot)
{
@@ -565,14 +715,22 @@ static int o2hb_check_slot(struct o2hb_region *reg,
u64 cputime;
unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
unsigned int slot_dead_ms;
+ int tmp;
memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
- /* Is this correct? Do we assume that the node doesn't exist
- * if we're not configured for him? */
+ /*
+ * If a node is no longer configured but is still in the livemap, we
+ * may need to clear that bit from the livemap.
+ */
node = o2nm_get_node_by_num(slot->ds_node_num);
- if (!node)
- return 0;
+ if (!node) {
+ spin_lock(&o2hb_live_lock);
+ tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap);
+ spin_unlock(&o2hb_live_lock);
+ if (!tmp)
+ return 0;
+ }
if (!o2hb_verify_crc(reg, hb_block)) {
/* all paths from here will drop o2hb_live_lock for
@@ -639,8 +797,12 @@ fire_callbacks:
mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n",
slot->ds_node_num, (long long)slot->ds_last_generation);
+ set_bit(slot->ds_node_num, reg->hr_live_node_bitmap);
+
/* first on the list generates a callback */
if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
+ mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes "
+ "bitmap\n", slot->ds_node_num);
set_bit(slot->ds_node_num, o2hb_live_node_bitmap);
o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node,
@@ -684,13 +846,18 @@ fire_callbacks:
mlog(ML_HEARTBEAT, "Node %d left my region\n",
slot->ds_node_num);
+ clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap);
+
/* last off the live_slot generates a callback */
list_del_init(&slot->ds_live_item);
if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
+ mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live "
+ "nodes bitmap\n", slot->ds_node_num);
clear_bit(slot->ds_node_num, o2hb_live_node_bitmap);
- o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node,
- slot->ds_node_num);
+ /* node can be null */
+ o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB,
+ node, slot->ds_node_num);
changed = 1;
}
@@ -706,11 +873,14 @@ fire_callbacks:
slot->ds_equal_samples = 0;
}
out:
+ o2hb_set_quorum_device(reg, slot);
+
spin_unlock(&o2hb_live_lock);
o2hb_run_event_list(&event);
- o2nm_node_put(node);
+ if (node)
+ o2nm_node_put(node);
return changed;
}
@@ -737,6 +907,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
{
int i, ret, highest_node, change = 0;
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
+ unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
struct o2hb_bio_wait_ctxt write_wc;
ret = o2nm_configured_node_map(configured_nodes,
@@ -746,6 +917,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
return ret;
}
+ /*
+ * If a node is not configured but is in the livemap, we still need
+ * to read the slot so as to be able to remove it from the livemap.
+ */
+ o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
+ i = -1;
+ while ((i = find_next_bit(live_node_bitmap,
+ O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
+ set_bit(i, configured_nodes);
+ }
+
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
if (highest_node >= O2NM_MAX_NODES) {
mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
@@ -917,21 +1099,59 @@ static int o2hb_thread(void *data)
#ifdef CONFIG_DEBUG_FS
static int o2hb_debug_open(struct inode *inode, struct file *file)
{
+ struct o2hb_debug_buf *db = inode->i_private;
+ struct o2hb_region *reg;
unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
char *buf = NULL;
int i = -1;
int out = 0;
+ /* max_nodes should be the largest bitmap we pass here */
+ BUG_ON(sizeof(map) < db->db_size);
+
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto bail;
- o2hb_fill_node_map(map, sizeof(map));
+ switch (db->db_type) {
+ case O2HB_DB_TYPE_LIVENODES:
+ case O2HB_DB_TYPE_LIVEREGIONS:
+ case O2HB_DB_TYPE_QUORUMREGIONS:
+ case O2HB_DB_TYPE_FAILEDREGIONS:
+ spin_lock(&o2hb_live_lock);
+ memcpy(map, db->db_data, db->db_size);
+ spin_unlock(&o2hb_live_lock);
+ break;
+
+ case O2HB_DB_TYPE_REGION_LIVENODES:
+ spin_lock(&o2hb_live_lock);
+ reg = (struct o2hb_region *)db->db_data;
+ memcpy(map, reg->hr_live_node_bitmap, db->db_size);
+ spin_unlock(&o2hb_live_lock);
+ break;
+
+ case O2HB_DB_TYPE_REGION_NUMBER:
+ reg = (struct o2hb_region *)db->db_data;
+ out += snprintf(buf + out, PAGE_SIZE - out, "%d\n",
+ reg->hr_region_num);
+ goto done;
+
+ case O2HB_DB_TYPE_REGION_ELAPSED_TIME:
+ reg = (struct o2hb_region *)db->db_data;
+ out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
+ jiffies_to_msecs(jiffies -
+ reg->hr_last_timeout_start));
+ goto done;
+
+ default:
+ goto done;
+ }
- while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
+ while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len)
out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
out += snprintf(buf + out, PAGE_SIZE - out, "\n");
+done:
i_size_write(inode, out);
file->private_data = buf;
@@ -978,10 +1198,104 @@ static const struct file_operations o2hb_debug_fops = {
void o2hb_exit(void)
{
- if (o2hb_debug_livenodes)
- debugfs_remove(o2hb_debug_livenodes);
- if (o2hb_debug_dir)
- debugfs_remove(o2hb_debug_dir);
+ kfree(o2hb_db_livenodes);
+ kfree(o2hb_db_liveregions);
+ kfree(o2hb_db_quorumregions);
+ kfree(o2hb_db_failedregions);
+ debugfs_remove(o2hb_debug_failedregions);
+ debugfs_remove(o2hb_debug_quorumregions);
+ debugfs_remove(o2hb_debug_liveregions);
+ debugfs_remove(o2hb_debug_livenodes);
+ debugfs_remove(o2hb_debug_dir);
+}
+
+static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
+ struct o2hb_debug_buf **db, int db_len,
+ int type, int size, int len, void *data)
+{
+ *db = kmalloc(db_len, GFP_KERNEL);
+ if (!*db)
+ return NULL;
+
+ (*db)->db_type = type;
+ (*db)->db_size = size;
+ (*db)->db_len = len;
+ (*db)->db_data = data;
+
+ return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db,
+ &o2hb_debug_fops);
+}
+
+static int o2hb_debug_init(void)
+{
+ int ret = -ENOMEM;
+
+ o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
+ if (!o2hb_debug_dir) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES,
+ o2hb_debug_dir,
+ &o2hb_db_livenodes,
+ sizeof(*o2hb_db_livenodes),
+ O2HB_DB_TYPE_LIVENODES,
+ sizeof(o2hb_live_node_bitmap),
+ O2NM_MAX_NODES,
+ o2hb_live_node_bitmap);
+ if (!o2hb_debug_livenodes) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS,
+ o2hb_debug_dir,
+ &o2hb_db_liveregions,
+ sizeof(*o2hb_db_liveregions),
+ O2HB_DB_TYPE_LIVEREGIONS,
+ sizeof(o2hb_live_region_bitmap),
+ O2NM_MAX_REGIONS,
+ o2hb_live_region_bitmap);
+ if (!o2hb_debug_liveregions) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ o2hb_debug_quorumregions =
+ o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS,
+ o2hb_debug_dir,
+ &o2hb_db_quorumregions,
+ sizeof(*o2hb_db_quorumregions),
+ O2HB_DB_TYPE_QUORUMREGIONS,
+ sizeof(o2hb_quorum_region_bitmap),
+ O2NM_MAX_REGIONS,
+ o2hb_quorum_region_bitmap);
+ if (!o2hb_debug_quorumregions) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ o2hb_debug_failedregions =
+ o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS,
+ o2hb_debug_dir,
+ &o2hb_db_failedregions,
+ sizeof(*o2hb_db_failedregions),
+ O2HB_DB_TYPE_FAILEDREGIONS,
+ sizeof(o2hb_failed_region_bitmap),
+ O2NM_MAX_REGIONS,
+ o2hb_failed_region_bitmap);
+ if (!o2hb_debug_failedregions) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ ret = 0;
+bail:
+ if (ret)
+ o2hb_exit();
+
+ return ret;
}
int o2hb_init(void)
@@ -997,24 +1311,12 @@ int o2hb_init(void)
INIT_LIST_HEAD(&o2hb_node_events);
memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
+ memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap));
+ memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap));
+ memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
+ memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
- o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
- if (!o2hb_debug_dir) {
- mlog_errno(-ENOMEM);
- return -ENOMEM;
- }
-
- o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES,
- S_IFREG|S_IRUSR,
- o2hb_debug_dir, NULL,
- &o2hb_debug_fops);
- if (!o2hb_debug_livenodes) {
- mlog_errno(-ENOMEM);
- debugfs_remove(o2hb_debug_dir);
- return -ENOMEM;
- }
-
- return 0;
+ return o2hb_debug_init();
}
/* if we're already in a callback then we're already serialized by the sem */
@@ -1078,6 +1380,13 @@ static void o2hb_region_release(struct config_item *item)
if (reg->hr_slots)
kfree(reg->hr_slots);
+ kfree(reg->hr_db_regnum);
+ kfree(reg->hr_db_livenodes);
+ debugfs_remove(reg->hr_debug_livenodes);
+ debugfs_remove(reg->hr_debug_regnum);
+ debugfs_remove(reg->hr_debug_elapsed_time);
+ debugfs_remove(reg->hr_debug_dir);
+
spin_lock(&o2hb_live_lock);
list_del(&reg->hr_all_item);
spin_unlock(&o2hb_live_lock);
@@ -1441,6 +1750,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
/* Ok, we were woken. Make sure it wasn't by drop_item() */
spin_lock(&o2hb_live_lock);
hb_task = reg->hr_task;
+ if (o2hb_global_heartbeat_active())
+ set_bit(reg->hr_region_num, o2hb_live_region_bitmap);
spin_unlock(&o2hb_live_lock);
if (hb_task)
@@ -1448,6 +1759,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
else
ret = -EIO;
+ if (hb_task && o2hb_global_heartbeat_active())
+ printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n",
+ config_item_name(&reg->hr_item));
+
out:
if (filp)
fput(filp);
@@ -1586,21 +1901,94 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
: NULL;
}
+static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
+{
+ int ret = -ENOMEM;
+
+ reg->hr_debug_dir =
+ debugfs_create_dir(config_item_name(&reg->hr_item), dir);
+ if (!reg->hr_debug_dir) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ reg->hr_debug_livenodes =
+ o2hb_debug_create(O2HB_DEBUG_LIVENODES,
+ reg->hr_debug_dir,
+ &(reg->hr_db_livenodes),
+ sizeof(*(reg->hr_db_livenodes)),
+ O2HB_DB_TYPE_REGION_LIVENODES,
+ sizeof(reg->hr_live_node_bitmap),
+ O2NM_MAX_NODES, reg);
+ if (!reg->hr_debug_livenodes) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ reg->hr_debug_regnum =
+ o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER,
+ reg->hr_debug_dir,
+ &(reg->hr_db_regnum),
+ sizeof(*(reg->hr_db_regnum)),
+ O2HB_DB_TYPE_REGION_NUMBER,
+ 0, O2NM_MAX_NODES, reg);
+ if (!reg->hr_debug_regnum) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ reg->hr_debug_elapsed_time =
+ o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME,
+ reg->hr_debug_dir,
+ &(reg->hr_db_elapsed_time),
+ sizeof(*(reg->hr_db_elapsed_time)),
+ O2HB_DB_TYPE_REGION_ELAPSED_TIME,
+ 0, 0, reg);
+ if (!reg->hr_debug_elapsed_time) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ ret = 0;
+bail:
+ return ret;
+}
+
static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
const char *name)
{
struct o2hb_region *reg = NULL;
+ int ret;
reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
if (reg == NULL)
return ERR_PTR(-ENOMEM);
- config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
+ if (strlen(name) > O2HB_MAX_REGION_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
spin_lock(&o2hb_live_lock);
+ reg->hr_region_num = 0;
+ if (o2hb_global_heartbeat_active()) {
+ reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap,
+ O2NM_MAX_REGIONS);
+ if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
+ spin_unlock(&o2hb_live_lock);
+ return ERR_PTR(-EFBIG);
+ }
+ set_bit(reg->hr_region_num, o2hb_region_bitmap);
+ }
list_add_tail(&reg->hr_all_item, &o2hb_all_regions);
spin_unlock(&o2hb_live_lock);
+ config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
+
+ ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
+ if (ret) {
+ config_item_put(&reg->hr_item);
+ return ERR_PTR(ret);
+ }
+
return &reg->hr_item;
}
@@ -1612,6 +2000,10 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
/* stop the thread when the user removes the region dir */
spin_lock(&o2hb_live_lock);
+ if (o2hb_global_heartbeat_active()) {
+ clear_bit(reg->hr_region_num, o2hb_region_bitmap);
+ clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
+ }
hb_task = reg->hr_task;
reg->hr_task = NULL;
spin_unlock(&o2hb_live_lock);
@@ -1628,6 +2020,9 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
wake_up(&o2hb_steady_queue);
}
+ if (o2hb_global_heartbeat_active())
+ printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
+ config_item_name(&reg->hr_item));
config_item_put(item);
}
@@ -1688,6 +2083,41 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group
return count;
}
+static
+ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group,
+ char *page)
+{
+ return sprintf(page, "%s\n",
+ o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]);
+}
+
+static
+ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group,
+ const char *page, size_t count)
+{
+ unsigned int i;
+ int ret;
+ size_t len;
+
+ len = (page[count - 1] == '\n') ? count - 1 : count;
+ if (!len)
+ return -EINVAL;
+
+ for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) {
+ if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len))
+ continue;
+
+ ret = o2hb_global_hearbeat_mode_set(i);
+ if (!ret)
+ printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n",
+ o2hb_heartbeat_mode_desc[i]);
+ return count;
+ }
+
+ return -EINVAL;
+
+}
+
static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "dead_threshold",
@@ -1696,8 +2126,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold
.store = o2hb_heartbeat_group_threshold_store,
};
+static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = {
+ .attr = { .ca_owner = THIS_MODULE,
+ .ca_name = "mode",
+ .ca_mode = S_IRUGO | S_IWUSR },
+ .show = o2hb_heartbeat_group_mode_show,
+ .store = o2hb_heartbeat_group_mode_store,
+};
+
static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = {
&o2hb_heartbeat_group_attr_threshold.attr,
+ &o2hb_heartbeat_group_attr_mode.attr,
NULL,
};
@@ -1963,3 +2402,34 @@ void o2hb_stop_all_regions(void)
spin_unlock(&o2hb_live_lock);
}
EXPORT_SYMBOL_GPL(o2hb_stop_all_regions);
+
+int o2hb_get_all_regions(char *region_uuids, u8 max_regions)
+{
+ struct o2hb_region *reg;
+ int numregs = 0;
+ char *p;
+
+ spin_lock(&o2hb_live_lock);
+
+ p = region_uuids;
+ list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ mlog(0, "Region: %s\n", config_item_name(&reg->hr_item));
+ if (numregs < max_regions) {
+ memcpy(p, config_item_name(&reg->hr_item),
+ O2HB_MAX_REGION_NAME_LEN);
+ p += O2HB_MAX_REGION_NAME_LEN;
+ }
+ numregs++;
+ }
+
+ spin_unlock(&o2hb_live_lock);
+
+ return numregs;
+}
+EXPORT_SYMBOL_GPL(o2hb_get_all_regions);
+
+int o2hb_global_heartbeat_active(void)
+{
+ return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL);
+}
+EXPORT_SYMBOL(o2hb_global_heartbeat_active);
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index 2f1649253b4..00ad8e8fea5 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -31,6 +31,8 @@
#define O2HB_REGION_TIMEOUT_MS 2000
+#define O2HB_MAX_REGION_NAME_LEN 32
+
/* number of changes to be seen as live */
#define O2HB_LIVE_THRESHOLD 2
/* number of equal samples to be seen as dead */
@@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
int o2hb_check_local_node_heartbeating(void);
void o2hb_stop_all_regions(void);
+int o2hb_get_all_regions(char *region_uuids, u8 numregions);
+int o2hb_global_heartbeat_active(void);
#endif /* O2CLUSTER_HEARTBEAT_H */
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index fd96e2a2fa5..ea2ed9f56c9 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -119,7 +119,8 @@
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
-#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
+#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
+#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index ed0c9f367fe..bb240647ca5 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group,
config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
spin_lock_init(&node->nd_lock);
+ mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name);
+
return &node->nd_item;
}
@@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group,
}
write_unlock(&cluster->cl_nodes_lock);
+ mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n",
+ config_item_name(&node->nd_item));
+
config_item_put(item);
}
diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h
index 5b9854bad57..49b594325be 100644
--- a/fs/ocfs2/cluster/ocfs2_nodemanager.h
+++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h
@@ -36,4 +36,10 @@
/* host name, group name, cluster name all 64 bytes */
#define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN
+/*
+ * Maximum number of global heartbeat regions allowed.
+ * **CAUTION** Changing this number will break dlm compatibility.
+ */
+#define O2NM_MAX_REGIONS 32
+
#endif /* _OCFS2_NODEMANAGER_H */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1361997cf20..9aa426e4212 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
size_t caller_veclen, u8 target_node, int *status)
{
- int ret;
+ int ret = 0;
struct o2net_msg *msg = NULL;
size_t veclen, caller_bytes = 0;
struct kvec *vec = NULL;
@@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num,
{
o2quo_hb_down(node_num);
+ if (!node)
+ return;
+
if (node_num != o2nm_this_node())
o2net_disconnect_node(node);
@@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
o2quo_hb_up(node_num);
+ BUG_ON(!node);
+
/* ensure an immediate connect attempt */
nn->nn_last_connect_attempt = jiffies -
(msecs_to_jiffies(o2net_reconnect_delay()) + 1);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f04ebcfffc4..c49f6de0e7a 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
goto out_commit;
}
+ cpos = split_hash;
+ ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
+ data_ac, meta_ac, new_dx_leaves,
+ num_dx_leaves);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
for (i = 0; i < num_dx_leaves; i++) {
ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
orig_dx_leaves[i],
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
mlog_errno(ret);
goto out_commit;
}
- }
- cpos = split_hash;
- ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
- data_ac, meta_ac, new_dx_leaves,
- num_dx_leaves);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
+ ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
+ new_dx_leaves[i],
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
}
ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 4b6ae2c13b4..b36d0bf77a5 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -445,7 +445,9 @@ enum {
DLM_LOCK_REQUEST_MSG, /* 515 */
DLM_RECO_DATA_DONE_MSG, /* 516 */
DLM_BEGIN_RECO_MSG, /* 517 */
- DLM_FINALIZE_RECO_MSG /* 518 */
+ DLM_FINALIZE_RECO_MSG, /* 518 */
+ DLM_QUERY_REGION, /* 519 */
+ DLM_QUERY_NODEINFO, /* 520 */
};
struct dlm_reco_node_data
@@ -727,6 +729,31 @@ struct dlm_cancel_join
u8 domain[O2NM_MAX_NAME_LEN];
};
+struct dlm_query_region {
+ u8 qr_node;
+ u8 qr_numregions;
+ u8 qr_namelen;
+ u8 pad1;
+ u8 qr_domain[O2NM_MAX_NAME_LEN];
+ u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS];
+};
+
+struct dlm_node_info {
+ u8 ni_nodenum;
+ u8 pad1;
+ u16 ni_ipv4_port;
+ u32 ni_ipv4_address;
+};
+
+struct dlm_query_nodeinfo {
+ u8 qn_nodenum;
+ u8 qn_numnodes;
+ u8 qn_namelen;
+ u8 pad1;
+ u8 qn_domain[O2NM_MAX_NAME_LEN];
+ struct dlm_node_info qn_nodes[O2NM_MAX_NODES];
+};
+
struct dlm_exit_domain
{
u8 node_idx;
@@ -1030,6 +1057,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
void dlm_clean_master_list(struct dlm_ctxt *dlm,
u8 dead_node);
+void dlm_force_free_mles(struct dlm_ctxt *dlm);
int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
int __dlm_lockres_unused(struct dlm_lock_resource *res);
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 26ff2e185b1..272ec8631a5 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
spin_lock(&dlm->track_lock);
if (oldres)
track_list = &oldres->tracking;
- else
+ else {
track_list = &dlm->tracking_list;
+ if (list_empty(track_list)) {
+ dl = NULL;
+ spin_unlock(&dlm->track_lock);
+ goto bail;
+ }
+ }
list_for_each_entry(res, track_list, tracking) {
if (&res->tracking == &dlm->tracking_list)
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
} else
dl = NULL;
+bail:
/* passed to seq_show */
return dl;
}
@@ -775,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
/* Domain: xxxxxxxxxx Key: 0xdfbac769 */
out += snprintf(db->buf + out, db->len - out,
- "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key);
+ "Domain: %s Key: 0x%08x Protocol: %d.%d\n",
+ dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
+ dlm->dlm_locking_proto.pv_minor);
/* Thread Pid: xxx Node: xxx State: xxxxx */
out += snprintf(db->buf + out, db->len - out,
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 153abb5abef..58a93b95373 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
* will have a negotiated version with the same major number and a minor
* number equal or smaller. The dlm_ctxt->dlm_locking_proto field should
* be used to determine what a running domain is actually using.
+ *
+ * New in version 1.1:
+ * - Message DLM_QUERY_REGION added to support global heartbeat
+ * - Message DLM_QUERY_NODEINFO added to allow online node removes
*/
static const struct dlm_protocol_version dlm_protocol = {
.pv_major = 1,
- .pv_minor = 0,
+ .pv_minor = 1,
};
#define DLM_DOMAIN_BACKOFF_MS 200
@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
+static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
+ void *data, void **ret_data);
static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
static int dlm_protocol_compare(struct dlm_protocol_version *existing,
@@ -693,6 +699,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
dlm_mark_domain_leaving(dlm);
dlm_leave_domain(dlm);
+ dlm_force_free_mles(dlm);
dlm_complete_dlm_shutdown(dlm);
}
dlm_put(dlm);
@@ -920,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
return 0;
}
+static int dlm_match_regions(struct dlm_ctxt *dlm,
+ struct dlm_query_region *qr)
+{
+ char *local = NULL, *remote = qr->qr_regions;
+ char *l, *r;
+ int localnr, i, j, foundit;
+ int status = 0;
+
+ if (!o2hb_global_heartbeat_active()) {
+ if (qr->qr_numregions) {
+ mlog(ML_ERROR, "Domain %s: Joining node %d has global "
+ "heartbeat enabled but local node %d does not\n",
+ qr->qr_domain, qr->qr_node, dlm->node_num);
+ status = -EINVAL;
+ }
+ goto bail;
+ }
+
+ if (o2hb_global_heartbeat_active() && !qr->qr_numregions) {
+ mlog(ML_ERROR, "Domain %s: Local node %d has global "
+ "heartbeat enabled but joining node %d does not\n",
+ qr->qr_domain, dlm->node_num, qr->qr_node);
+ status = -EINVAL;
+ goto bail;
+ }
+
+ r = remote;
+ for (i = 0; i < qr->qr_numregions; ++i) {
+ mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r);
+ r += O2HB_MAX_REGION_NAME_LEN;
+ }
+
+ local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
+ if (!local) {
+ status = -ENOMEM;
+ goto bail;
+ }
+
+ localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
+
+ /* compare local regions with remote */
+ l = local;
+ for (i = 0; i < localnr; ++i) {
+ foundit = 0;
+ r = remote;
+ for (j = 0; j <= qr->qr_numregions; ++j) {
+ if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) {
+ foundit = 1;
+ break;
+ }
+ r += O2HB_MAX_REGION_NAME_LEN;
+ }
+ if (!foundit) {
+ status = -EINVAL;
+ mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
+ "in local node %d but not in joining node %d\n",
+ qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l,
+ dlm->node_num, qr->qr_node);
+ goto bail;
+ }
+ l += O2HB_MAX_REGION_NAME_LEN;
+ }
+
+ /* compare remote with local regions */
+ r = remote;
+ for (i = 0; i < qr->qr_numregions; ++i) {
+ foundit = 0;
+ l = local;
+ for (j = 0; j < localnr; ++j) {
+ if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) {
+ foundit = 1;
+ break;
+ }
+ l += O2HB_MAX_REGION_NAME_LEN;
+ }
+ if (!foundit) {
+ status = -EINVAL;
+ mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
+ "in joining node %d but not in local node %d\n",
+ qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r,
+ qr->qr_node, dlm->node_num);
+ goto bail;
+ }
+ r += O2HB_MAX_REGION_NAME_LEN;
+ }
+
+bail:
+ kfree(local);
+
+ return status;
+}
+
+static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map)
+{
+ struct dlm_query_region *qr = NULL;
+ int status, ret = 0, i;
+ char *p;
+
+ if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
+ goto bail;
+
+ qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL);
+ if (!qr) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ qr->qr_node = dlm->node_num;
+ qr->qr_namelen = strlen(dlm->name);
+ memcpy(qr->qr_domain, dlm->name, qr->qr_namelen);
+ /* if local hb, the numregions will be zero */
+ if (o2hb_global_heartbeat_active())
+ qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions,
+ O2NM_MAX_REGIONS);
+
+ p = qr->qr_regions;
+ for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN)
+ mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p);
+
+ i = -1;
+ while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
+ i + 1)) < O2NM_MAX_NODES) {
+ if (i == dlm->node_num)
+ continue;
+
+ mlog(0, "Sending regions to node %d\n", i);
+
+ ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr,
+ sizeof(struct dlm_query_region),
+ i, &status);
+ if (ret >= 0)
+ ret = status;
+ if (ret) {
+ mlog(ML_ERROR, "Region mismatch %d, node %d\n",
+ ret, i);
+ break;
+ }
+ }
+
+bail:
+ kfree(qr);
+ return ret;
+}
+
+static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
+ void *data, void **ret_data)
+{
+ struct dlm_query_region *qr;
+ struct dlm_ctxt *dlm = NULL;
+ int status = 0;
+ int locked = 0;
+
+ qr = (struct dlm_query_region *) msg->buf;
+
+ mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
+ qr->qr_domain);
+
+ status = -EINVAL;
+
+ spin_lock(&dlm_domain_lock);
+ dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen);
+ if (!dlm) {
+ mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
+ "before join domain\n", qr->qr_node, qr->qr_domain);
+ goto bail;
+ }
+
+ spin_lock(&dlm->spinlock);
+ locked = 1;
+ if (dlm->joining_node != qr->qr_node) {
+ mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
+ "but joining node is %d\n", qr->qr_node, qr->qr_domain,
+ dlm->joining_node);
+ goto bail;
+ }
+
+ /* Support for global heartbeat was added in 1.1 */
+ if (dlm->dlm_locking_proto.pv_major == 1 &&
+ dlm->dlm_locking_proto.pv_minor == 0) {
+ mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
+ "but active dlm protocol is %d.%d\n", qr->qr_node,
+ qr->qr_domain, dlm->dlm_locking_proto.pv_major,
+ dlm->dlm_locking_proto.pv_minor);
+ goto bail;
+ }
+
+ status = dlm_match_regions(dlm, qr);
+
+bail:
+ if (locked)
+ spin_unlock(&dlm->spinlock);
+ spin_unlock(&dlm_domain_lock);
+
+ return status;
+}
+
+static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn)
+{
+ struct o2nm_node *local;
+ struct dlm_node_info *remote;
+ int i, j;
+ int status = 0;
+
+ for (j = 0; j < qn->qn_numnodes; ++j)
+ mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum,
+ &(qn->qn_nodes[j].ni_ipv4_address),
+ ntohs(qn->qn_nodes[j].ni_ipv4_port));
+
+ for (i = 0; i < O2NM_MAX_NODES && !status; ++i) {
+ local = o2nm_get_node_by_num(i);
+ remote = NULL;
+ for (j = 0; j < qn->qn_numnodes; ++j) {
+ if (qn->qn_nodes[j].ni_nodenum == i) {
+ remote = &(qn->qn_nodes[j]);
+ break;
+ }
+ }
+
+ if (!local && !remote)
+ continue;
+
+ if ((local && !remote) || (!local && remote))
+ status = -EINVAL;
+
+ if (!status &&
+ ((remote->ni_nodenum != local->nd_num) ||
+ (remote->ni_ipv4_port != local->nd_ipv4_port) ||
+ (remote->ni_ipv4_address != local->nd_ipv4_address)))
+ status = -EINVAL;
+
+ if (status) {
+ if (remote && !local)
+ mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) "
+ "registered in joining node %d but not in "
+ "local node %d\n", qn->qn_domain,
+ remote->ni_nodenum,
+ &(remote->ni_ipv4_address),
+ ntohs(remote->ni_ipv4_port),
+ qn->qn_nodenum, dlm->node_num);
+ if (local && !remote)
+ mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) "
+ "registered in local node %d but not in "
+ "joining node %d\n", qn->qn_domain,
+ local->nd_num, &(local->nd_ipv4_address),
+ ntohs(local->nd_ipv4_port),
+ dlm->node_num, qn->qn_nodenum);
+ BUG_ON((!local && !remote));
+ }
+
+ if (local)
+ o2nm_node_put(local);
+ }
+
+ return status;
+}
+
+static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map)
+{
+ struct dlm_query_nodeinfo *qn = NULL;
+ struct o2nm_node *node;
+ int ret = 0, status, count, i;
+
+ if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
+ goto bail;
+
+ qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL);
+ if (!qn) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) {
+ node = o2nm_get_node_by_num(i);
+ if (!node)
+ continue;
+ qn->qn_nodes[count].ni_nodenum = node->nd_num;
+ qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port;
+ qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address;
+ mlog(0, "Node %3d, %pI4:%u\n", node->nd_num,
+ &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port));
+ ++count;
+ o2nm_node_put(node);
+ }
+
+ qn->qn_nodenum = dlm->node_num;
+ qn->qn_numnodes = count;
+ qn->qn_namelen = strlen(dlm->name);
+ memcpy(qn->qn_domain, dlm->name, qn->qn_namelen);
+
+ i = -1;
+ while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
+ i + 1)) < O2NM_MAX_NODES) {
+ if (i == dlm->node_num)
+ continue;
+
+ mlog(0, "Sending nodeinfo to node %d\n", i);
+
+ ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY,
+ qn, sizeof(struct dlm_query_nodeinfo),
+ i, &status);
+ if (ret >= 0)
+ ret = status;
+ if (ret) {
+ mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i);
+ break;
+ }
+ }
+
+bail:
+ kfree(qn);
+ return ret;
+}
+
+static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
+ void *data, void **ret_data)
+{
+ struct dlm_query_nodeinfo *qn;
+ struct dlm_ctxt *dlm = NULL;
+ int locked = 0, status = -EINVAL;
+
+ qn = (struct dlm_query_nodeinfo *) msg->buf;
+
+ mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum,
+ qn->qn_domain);
+
+ spin_lock(&dlm_domain_lock);
+ dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen);
+ if (!dlm) {
+ mlog(ML_ERROR, "Node %d queried nodes on domain %s before "
+ "join domain\n", qn->qn_nodenum, qn->qn_domain);
+ goto bail;
+ }
+
+ spin_lock(&dlm->spinlock);
+ locked = 1;
+ if (dlm->joining_node != qn->qn_nodenum) {
+ mlog(ML_ERROR, "Node %d queried nodes on domain %s but "
+ "joining node is %d\n", qn->qn_nodenum, qn->qn_domain,
+ dlm->joining_node);
+ goto bail;
+ }
+
+ /* Support for node query was added in 1.1 */
+ if (dlm->dlm_locking_proto.pv_major == 1 &&
+ dlm->dlm_locking_proto.pv_minor == 0) {
+ mlog(ML_ERROR, "Node %d queried nodes on domain %s "
+ "but active dlm protocol is %d.%d\n", qn->qn_nodenum,
+ qn->qn_domain, dlm->dlm_locking_proto.pv_major,
+ dlm->dlm_locking_proto.pv_minor);
+ goto bail;
+ }
+
+ status = dlm_match_nodes(dlm, qn);
+
+bail:
+ if (locked)
+ spin_unlock(&dlm->spinlock);
+ spin_unlock(&dlm_domain_lock);
+
+ return status;
+}
+
static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data)
{
@@ -1240,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
set_bit(dlm->node_num, dlm->domain_map);
spin_unlock(&dlm->spinlock);
+ /* Support for global heartbeat and node info was added in 1.1 */
+ if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) {
+ status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ status = dlm_send_regions(dlm, ctxt->yes_resp_map);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
dlm_send_join_asserts(dlm, ctxt->yes_resp_map);
/* Joined state *must* be set before the joining node
@@ -1806,7 +2191,21 @@ static int dlm_register_net_handlers(void)
sizeof(struct dlm_cancel_join),
dlm_cancel_join_handler,
NULL, NULL, &dlm_join_handlers);
+ if (status)
+ goto bail;
+
+ status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY,
+ sizeof(struct dlm_query_region),
+ dlm_query_region_handler,
+ NULL, NULL, &dlm_join_handlers);
+ if (status)
+ goto bail;
+
+ status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY,
+ sizeof(struct dlm_query_nodeinfo),
+ dlm_query_nodeinfo_handler,
+ NULL, NULL, &dlm_join_handlers);
bail:
if (status < 0)
dlm_unregister_net_handlers();
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ffb4c68dafa..f564b0e5f80 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
wake_up(&res->wq);
wake_up(&dlm->migration_wq);
}
+
+void dlm_force_free_mles(struct dlm_ctxt *dlm)
+{
+ int i;
+ struct hlist_head *bucket;
+ struct dlm_master_list_entry *mle;
+ struct hlist_node *tmp, *list;
+
+ /*
+ * We notified all other nodes that we are exiting the domain and
+ * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still
+ * around we force free them and wake any processes that are waiting
+ * on the mles
+ */
+ spin_lock(&dlm->spinlock);
+ spin_lock(&dlm->master_lock);
+
+ BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
+ BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
+
+ for (i = 0; i < DLM_HASH_BUCKETS; i++) {
+ bucket = dlm_master_hash(dlm, i);
+ hlist_for_each_safe(list, tmp, bucket) {
+ mle = hlist_entry(list, struct dlm_master_list_entry,
+ master_hash_node);
+ if (mle->type != DLM_MLE_BLOCK) {
+ mlog(ML_ERROR, "bad mle: %p\n", mle);
+ dlm_print_one_mle(mle);
+ }
+ atomic_set(&mle->woken, 1);
+ wake_up(&mle->wq);
+
+ __dlm_unlink_mle(dlm, mle);
+ __dlm_mle_detach_hb_events(dlm, mle);
+ __dlm_put_mle(mle);
+ }
+ }
+ spin_unlock(&dlm->master_lock);
+ spin_unlock(&dlm->spinlock);
+}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index d1ce48e1b3d..1d596d8c4a4 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -84,6 +84,7 @@ enum {
OI_LS_PARENT,
OI_LS_RENAME1,
OI_LS_RENAME2,
+ OI_LS_REFLINK_TARGET,
};
int ocfs2_dlm_init(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 3064feef143..d8408217e3b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -250,7 +250,7 @@ enum ocfs2_local_alloc_state
enum ocfs2_mount_options
{
- OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */
+ OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */
OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */
OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */
OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
@@ -263,9 +263,10 @@ enum ocfs2_mount_options
control lists */
OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
-
- OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12 /* Allow concurrent O_DIRECT
- writes */
+ OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT
+ writes */
+ OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
+ OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -379,6 +380,8 @@ struct ocfs2_super
struct ocfs2_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */
+ u8 osb_stackflags;
+
char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
struct ocfs2_cluster_connection *cconn;
struct ocfs2_lock_res osb_super_lockres;
@@ -612,10 +615,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
return ret;
}
-static inline int ocfs2_userspace_stack(struct ocfs2_super *osb)
+static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb)
{
return (osb->s_feature_incompat &
- OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK);
+ (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK |
+ OCFS2_FEATURE_INCOMPAT_CLUSTERINFO));
+}
+
+static inline int ocfs2_userspace_stack(struct ocfs2_super *osb)
+{
+ if (ocfs2_clusterinfo_valid(osb) &&
+ memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK,
+ OCFS2_STACK_LABEL_LEN))
+ return 1;
+ return 0;
+}
+
+static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb)
+{
+ if (ocfs2_clusterinfo_valid(osb) &&
+ !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK,
+ OCFS2_STACK_LABEL_LEN))
+ return 1;
+ return 0;
+}
+
+static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
+{
+ return ocfs2_o2cb_stack(osb) &&
+ (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT);
}
static inline int ocfs2_mount_local(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 723b20dac41..c2e4f8222e2 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -101,7 +101,8 @@
| OCFS2_FEATURE_INCOMPAT_META_ECC \
| OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
| OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
- | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
+ | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
+ | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -170,6 +171,13 @@
#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
/*
+ * Incompat bit to indicate useable clusterinfo with stackflags for all
+ * cluster stacks (userspace adnd o2cb). If this bit is set,
+ * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set.
+ */
+#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000
+
+/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
*/
@@ -235,18 +243,31 @@
#define OCFS2_HAS_REFCOUNT_FL (0x0010)
/* Inode attributes, keep in sync with EXT2 */
-#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
-#define OCFS2_UNRM_FL (0x00000002) /* Undelete */
-#define OCFS2_COMPR_FL (0x00000004) /* Compress file */
-#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */
-#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */
-#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */
-#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */
-#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */
-#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */
-
-#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */
-#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */
+#define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */
+#define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */
+#define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */
+#define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */
+#define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */
+#define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */
+#define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */
+#define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */
+/* Reserved for compression usage... */
+#define OCFS2_DIRTY_FL FS_DIRTY_FL
+#define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */
+#define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */
+#define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */
+/* End compression flags --- maybe not all used */
+#define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */
+#define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */
+#define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */
+#define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */
+#define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
+#define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
+#define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
+#define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
+
+#define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
+#define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
/*
* Extent record flags (e_node.leaf.flags)
@@ -279,10 +300,13 @@
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
-/* The alternate, userspace stack fields */
+/* The cluster stack fields */
#define OCFS2_STACK_LABEL_LEN 4
#define OCFS2_CLUSTER_NAME_LEN 16
+/* Classic (historically speaking) cluster stack */
+#define OCFS2_CLASSIC_CLUSTER_STACK "o2cb"
+
/* Journal limits (in bytes) */
#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
@@ -292,6 +316,11 @@
*/
#define OCFS2_MIN_XATTR_INLINE_SIZE 256
+/*
+ * Cluster info flags (ocfs2_cluster_info.ci_stackflags)
+ */
+#define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01)
+
struct ocfs2_system_inode_info {
char *si_name;
int si_iflags;
@@ -352,6 +381,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
/* Parameter passed from mount.ocfs2 to module */
#define OCFS2_HB_NONE "heartbeat=none"
#define OCFS2_HB_LOCAL "heartbeat=local"
+#define OCFS2_HB_GLOBAL "heartbeat=global"
/*
* OCFS2 directory file types. Only the low 3 bits are used. The
@@ -558,9 +588,21 @@ struct ocfs2_slot_map_extended {
*/
};
+/*
+ * ci_stackflags is only valid if the incompat bit
+ * OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set.
+ */
struct ocfs2_cluster_info {
/*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN];
- __le32 ci_reserved;
+ union {
+ __le32 ci_reserved;
+ struct {
+ __u8 ci_stackflags;
+ __u8 ci_reserved1;
+ __u8 ci_reserved2;
+ __u8 ci_reserved3;
+ };
+ };
/*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN];
/*18*/
};
@@ -597,9 +639,9 @@ struct ocfs2_super_block {
* group header */
/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
/*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */
-/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace
- stack. Only valid
- with INCOMPAT flag. */
+/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either
+ userspace or clusterinfo
+ INCOMPAT flag set. */
/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
for this fs*/
__le16 s_reserved0;
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 9bc53549986..b46f39bf743 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -23,10 +23,10 @@
/*
* ioctl commands
*/
-#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long)
-#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long)
-#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int)
-#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int)
+#define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS
+#define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS
+#define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS
/*
* Space reservation / allocation / free ioctls and argument structure
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a120cfcf69b..b5f9160e93e 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4240,8 +4240,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out;
}
- mutex_lock(&new_inode->i_mutex);
- ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
+ mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+ ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
+ OI_LS_REFLINK_TARGET);
if (ret) {
mlog_errno(ret);
goto out_unlock;
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index d8b6e4259b8..3e78db361bc 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
struct ocfs2_alloc_reservation *resv,
int *cstart, int *clen)
{
- unsigned int wanted = *clen;
-
if (resv == NULL || ocfs2_resmap_disabled(resmap))
return -ENOSPC;
spin_lock(&resv_lock);
- /*
- * We don't want to over-allocate for temporary
- * windows. Otherwise, we run the risk of fragmenting the
- * allocation space.
- */
- wanted = ocfs2_resv_window_bits(resmap, resv);
- if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
- wanted = *clen;
-
if (ocfs2_resv_empty(resv)) {
- mlog(0, "empty reservation, find new window\n");
+ /*
+ * We don't want to over-allocate for temporary
+ * windows. Otherwise, we run the risk of fragmenting the
+ * allocation space.
+ */
+ unsigned int wanted = ocfs2_resv_window_bits(resmap, resv);
+ if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
+ wanted = *clen;
+
+ mlog(0, "empty reservation, find new window\n");
/*
* Try to get a window here. If it works, we must fall
* through and test the bitmap . This avoids some
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 0d3049f696c..19965b00c43 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
/* for now we only have one cluster/node, make sure we see it
* in the heartbeat universe */
if (!o2hb_check_local_node_heartbeating()) {
+ if (o2hb_global_heartbeat_active())
+ mlog(ML_ERROR, "Global heartbeat not started\n");
rc = -EINVAL;
goto out;
}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 64f2c50a1c3..5fed60de763 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -357,7 +357,7 @@ out:
static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
struct ocfs2_group_desc *bg,
struct ocfs2_chain_list *cl,
- u64 p_blkno, u32 clusters)
+ u64 p_blkno, unsigned int clusters)
{
struct ocfs2_extent_list *el = &bg->bg_list;
struct ocfs2_extent_rec *rec;
@@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
rec->e_blkno = cpu_to_le64(p_blkno);
rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
le16_to_cpu(cl->cl_bpc));
- rec->e_leaf_clusters = cpu_to_le32(clusters);
+ rec->e_leaf_clusters = cpu_to_le16(clusters);
le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
le16_add_cpu(&bg->bg_free_bits_count,
clusters * le16_to_cpu(cl->cl_bpc));
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 9122d59f812..a8a0ca44f88 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -162,6 +162,7 @@ enum {
Opt_nointr,
Opt_hb_none,
Opt_hb_local,
+ Opt_hb_global,
Opt_data_ordered,
Opt_data_writeback,
Opt_atime_quantum,
@@ -192,6 +193,7 @@ static const match_table_t tokens = {
{Opt_nointr, "nointr"},
{Opt_hb_none, OCFS2_HB_NONE},
{Opt_hb_local, OCFS2_HB_LOCAL},
+ {Opt_hb_global, OCFS2_HB_GLOBAL},
{Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"},
{Opt_atime_quantum, "atime_quantum=%u"},
@@ -626,6 +628,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
int ret = 0;
struct mount_options parsed_options;
struct ocfs2_super *osb = OCFS2_SB(sb);
+ u32 tmp;
lock_kernel();
@@ -635,8 +638,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto out;
}
- if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) !=
- (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
+ tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE;
+ if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
goto out;
@@ -827,23 +831,29 @@ bail:
static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
{
- if (ocfs2_mount_local(osb)) {
- if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
+ u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL;
+
+ if (osb->s_mount_opt & hb_enabled) {
+ if (ocfs2_mount_local(osb)) {
mlog(ML_ERROR, "Cannot heartbeat on a locally "
"mounted device.\n");
return -EINVAL;
}
- }
-
- if (ocfs2_userspace_stack(osb)) {
- if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
+ if (ocfs2_userspace_stack(osb)) {
mlog(ML_ERROR, "Userspace stack expected, but "
"o2cb heartbeat arguments passed to mount\n");
return -EINVAL;
}
+ if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) &&
+ !ocfs2_cluster_o2cb_global_heartbeat(osb)) ||
+ ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) &&
+ ocfs2_cluster_o2cb_global_heartbeat(osb))) {
+ mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n");
+ return -EINVAL;
+ }
}
- if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
+ if (!(osb->s_mount_opt & hb_enabled)) {
if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) &&
!ocfs2_userspace_stack(osb)) {
mlog(ML_ERROR, "Heartbeat has to be started to mount "
@@ -1309,6 +1319,7 @@ static int ocfs2_parse_options(struct super_block *sb,
{
int status;
char *p;
+ u32 tmp;
mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
options ? options : "(none)");
@@ -1340,7 +1351,10 @@ static int ocfs2_parse_options(struct super_block *sb,
mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL;
break;
case Opt_hb_none:
- mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL;
+ mopt->mount_opt |= OCFS2_MOUNT_HB_NONE;
+ break;
+ case Opt_hb_global:
+ mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL;
break;
case Opt_barrier:
if (match_int(&args[0], &option)) {
@@ -1501,6 +1515,15 @@ static int ocfs2_parse_options(struct super_block *sb,
}
}
+ /* Ensure only one heartbeat mode */
+ tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE);
+ if (hweight32(tmp) != 1) {
+ mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+ status = 0;
+ goto bail;
+ }
+
status = 1;
bail:
@@ -1514,10 +1537,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
unsigned long opts = osb->s_mount_opt;
unsigned int local_alloc_megs;
- if (opts & OCFS2_MOUNT_HB_LOCAL)
- seq_printf(s, ",_netdev,heartbeat=local");
- else
- seq_printf(s, ",heartbeat=none");
+ if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) {
+ seq_printf(s, ",_netdev");
+ if (opts & OCFS2_MOUNT_HB_LOCAL)
+ seq_printf(s, ",%s", OCFS2_HB_LOCAL);
+ else
+ seq_printf(s, ",%s", OCFS2_HB_GLOBAL);
+ } else
+ seq_printf(s, ",%s", OCFS2_HB_NONE);
if (opts & OCFS2_MOUNT_NOINTR)
seq_printf(s, ",nointr");
@@ -2209,7 +2236,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto bail;
}
- if (ocfs2_userspace_stack(osb)) {
+ if (ocfs2_clusterinfo_valid(osb)) {
+ osb->osb_stackflags =
+ OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
memcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
OCFS2_STACK_LABEL_LEN);
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 32499d213fc..9975457c981 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
}
/* Fast symlinks can't be large */
- len = strlen(target);
+ len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb));
link = kzalloc(len + 1, GFP_NOFS);
if (!link) {
status = -ENOMEM;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d03469f6180..06fa5e77c40 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode,
xis.inode_bh = xbs.inode_bh = di_bh;
di = (struct ocfs2_dinode *)di_bh->b_data;
- down_read(&oi->ip_xattr_sem);
ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
buffer_size, &xis);
if (ret == -ENODATA && di->i_xattr_loc)
ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
buffer_size, &xbs);
- up_read(&oi->ip_xattr_sem);
return ret;
}
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode,
mlog_errno(ret);
return ret;
}
+ down_read(&OCFS2_I(inode)->ip_xattr_sem);
ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
name, buffer, buffer_size);
+ up_read(&OCFS2_I(inode)->ip_xattr_sem);
ocfs2_inode_unlock(inode, 0);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a1c43e7c8a7..8e4addaa542 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2675,7 +2675,7 @@ static const struct pid_entry tgid_base_stuff[] = {
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
- INF("limits", S_IRUSR, proc_pid_limits),
+ INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
@@ -3011,7 +3011,7 @@ static const struct pid_entry tid_base_stuff[] = {
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
- INF("limits", S_IRUSR, proc_pid_limits),
+ INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 180cf5a0bd6..3b8b4566033 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -146,7 +146,7 @@ u64 stable_page_flags(struct page *page)
u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
#endif
-#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached);
#endif
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 271afc48b9a..1dbca4e8cc1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -363,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
mss->referenced += PAGE_SIZE;
mapcount = page_mapcount(page);
if (mapcount >= 2) {
- if (pte_dirty(ptent))
+ if (pte_dirty(ptent) || PageDirty(page))
mss->shared_dirty += PAGE_SIZE;
else
mss->shared_clean += PAGE_SIZE;
mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
} else {
- if (pte_dirty(ptent))
+ if (pte_dirty(ptent) || PageDirty(page))
mss->private_dirty += PAGE_SIZE;
else
mss->private_clean += PAGE_SIZE;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 91c817ff02c..2367fb3f70b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
static const struct file_operations proc_vmcore_operations = {
.read = read_vmcore,
- .llseek = generic_file_llseek,
+ .llseek = default_llseek,
};
static struct vmcore* __init get_new_element(void)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index f53505de071..5cbb81e134a 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -170,6 +170,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
int reiserfs_unpack(struct inode *inode, struct file *filp)
{
int retval = 0;
+ int depth;
int index;
struct page *page;
struct address_space *mapping;
@@ -188,8 +189,8 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
/* we need to make sure nobody is changing the file size beneath
** us
*/
- mutex_lock(&inode->i_mutex);
- reiserfs_write_lock(inode->i_sb);
+ reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
+ depth = reiserfs_write_lock_once(inode->i_sb);
write_from = inode->i_size & (blocksize - 1);
/* if we are on a block boundary, we are already unpacked. */
@@ -224,6 +225,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
out:
mutex_unlock(&inode->i_mutex);
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, depth);
return retval;
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d72cf2bb054..286e36e21da 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1932,7 +1932,8 @@ xfs_buf_init(void)
if (!xfs_buf_zone)
goto out;
- xfslogd_workqueue = create_workqueue("xfslogd");
+ xfslogd_workqueue = alloc_workqueue("xfslogd",
+ WQ_RESCUER | WQ_HIGHPRI, 1);
if (!xfslogd_workqueue)
goto out_free_buf_zone;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4fec427b83e..3b9e626f7cd 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr(
{
struct fsxattr fa;
+ memset(&fa, 0, sizeof(struct fsxattr));
+
xfs_ilock(ip, XFS_ILOCK_SHARED);
fa.fsx_xflags = xfs_ip2xflags(ip);
fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ed575fb4b49..7e206fc1fa3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -405,9 +405,15 @@ xlog_cil_push(
new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
new_ctx->ticket = xlog_cil_ticket_alloc(log);
- /* lock out transaction commit, but don't block on background push */
+ /*
+ * Lock out transaction commit, but don't block for background pushes
+ * unless we are well over the CIL space limit. See the definition of
+ * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
+ * used here.
+ */
if (!down_write_trylock(&cil->xc_ctx_lock)) {
- if (!push_seq)
+ if (!push_seq &&
+ cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
goto out_free_ticket;
down_write(&cil->xc_ctx_lock);
}
@@ -422,7 +428,7 @@ xlog_cil_push(
goto out_skip;
/* check for a previously pushed seqeunce */
- if (push_seq < cil->xc_ctx->sequence)
+ if (push_seq && push_seq < cil->xc_ctx->sequence)
goto out_skip;
/*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ced52b98b32..edcdfe01617 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -426,13 +426,13 @@ struct xfs_cil {
};
/*
- * The amount of log space we should the CIL to aggregate is difficult to size.
- * Whatever we chose we have to make we can get a reservation for the log space
- * effectively, that it is large enough to capture sufficient relogging to
- * reduce log buffer IO significantly, but it is not too large for the log or
- * induces too much latency when writing out through the iclogs. We track both
- * space consumed and the number of vectors in the checkpoint context, so we
- * need to decide which to use for limiting.
+ * The amount of log space we allow the CIL to aggregate is difficult to size.
+ * Whatever we choose, we have to make sure we can get a reservation for the
+ * log space effectively, that it is large enough to capture sufficient
+ * relogging to reduce log buffer IO significantly, but it is not too large for
+ * the log or induces too much latency when writing out through the iclogs. We
+ * track both space consumed and the number of vectors in the checkpoint
+ * context, so we need to decide which to use for limiting.
*
* Every log buffer we write out during a push needs a header reserved, which
* is at least one sector and more for v2 logs. Hence we need a reservation of
@@ -459,16 +459,21 @@ struct xfs_cil {
* checkpoint transaction ticket is specific to the checkpoint context, rather
* than the CIL itself.
*
- * With dynamic reservations, we can basically make up arbitrary limits for the
- * checkpoint size so long as they don't violate any other size rules. Hence
- * the initial maximum size for the checkpoint transaction will be set to a
- * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
- * right now based on the latency of writing out a large amount of data through
- * the circular iclog buffers.
+ * With dynamic reservations, we can effectively make up arbitrary limits for
+ * the checkpoint size so long as they don't violate any other size rules.
+ * Recovery imposes a rule that no transaction exceed half the log, so we are
+ * limited by that. Furthermore, the log transaction reservation subsystem
+ * tries to keep 25% of the log free, so we need to keep below that limit or we
+ * risk running out of free log space to start any new transactions.
+ *
+ * In order to keep background CIL push efficient, we will set a lower
+ * threshold at which background pushing is attempted without blocking current
+ * transaction commits. A separate, higher bound defines when CIL pushes are
+ * enforced to ensure we stay within our maximum checkpoint size bounds.
+ * threshold, yet give us plenty of space for aggregation on large logs.
*/
-
-#define XLOG_CIL_SPACE_LIMIT(log) \
- (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
+#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
+#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
/*
* The reservation head lsn is not made up of a cycle number and block number.