aboutsummaryrefslogtreecommitdiff
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/acl.c235
-rw-r--r--fs/ocfs2/acl.h13
-rw-r--r--fs/ocfs2/alloc.c61
-rw-r--r--fs/ocfs2/aops.c46
-rw-r--r--fs/ocfs2/aops.h5
-rw-r--r--fs/ocfs2/buffer_head_io.c6
-rw-r--r--fs/ocfs2/cluster/Makefile2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c44
-rw-r--r--fs/ocfs2/cluster/masklog.h3
-rw-r--r--fs/ocfs2/cluster/nodemanager.c4
-rw-r--r--fs/ocfs2/cluster/sys.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c123
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h2
-rw-r--r--fs/ocfs2/cluster/ver.c42
-rw-r--r--fs/ocfs2/cluster/ver.h31
-rw-r--r--fs/ocfs2/dcache.c61
-rw-r--r--fs/ocfs2/dcache.h12
-rw-r--r--fs/ocfs2/dir.c18
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h5
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c46
-rw-r--r--fs/ocfs2/dlm/dlmlock.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c79
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c49
-rw-r--r--fs/ocfs2/dlm/dlmthread.c13
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c18
-rw-r--r--fs/ocfs2/dlm/dlmver.c42
-rw-r--r--fs/ocfs2/dlm/dlmver.h31
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/dlmfs/dlmfsver.c42
-rw-r--r--fs/ocfs2/dlmfs/dlmfsver.h31
-rw-r--r--fs/ocfs2/dlmglue.c57
-rw-r--r--fs/ocfs2/dlmglue.h3
-rw-r--r--fs/ocfs2/file.c280
-rw-r--r--fs/ocfs2/inode.c71
-rw-r--r--fs/ocfs2/inode.h17
-rw-r--r--fs/ocfs2/ioctl.c93
-rw-r--r--fs/ocfs2/journal.c23
-rw-r--r--fs/ocfs2/journal.h14
-rw-r--r--fs/ocfs2/localalloc.c42
-rw-r--r--fs/ocfs2/localalloc.h6
-rw-r--r--fs/ocfs2/locks.c2
-rw-r--r--fs/ocfs2/move_extents.c95
-rw-r--r--fs/ocfs2/namei.c199
-rw-r--r--fs/ocfs2/ocfs2.h35
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/ocfs2/quota.h2
-rw-r--r--fs/ocfs2/quota_global.c62
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c54
-rw-r--r--fs/ocfs2/resize.c22
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/stack_user.c308
-rw-r--r--fs/ocfs2/stackglue.c50
-rw-r--r--fs/ocfs2/stackglue.h15
-rw-r--r--fs/ocfs2/suballoc.c45
-rw-r--r--fs/ocfs2/suballoc.h16
-rw-r--r--fs/ocfs2/super.c79
-rw-r--r--fs/ocfs2/sysfile.c3
-rw-r--r--fs/ocfs2/uptodate.c2
-rw-r--r--fs/ocfs2/ver.c43
-rw-r--r--fs/ocfs2/ver.h31
-rw-r--r--fs/ocfs2/xattr.c84
-rw-r--r--fs/ocfs2/xattr.h6
67 files changed, 1443 insertions, 1404 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index f17e58b3298..ce210d4951a 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -38,7 +38,6 @@ ocfs2-objs := \
symlink.o \
sysfile.o \
uptodate.o \
- ver.o \
quota_local.o \
quota_global.o \
xattr.o \
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index b4f788e0ca3..7e8282dcea2 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -160,36 +160,6 @@ static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
return acl;
}
-
-/*
- * Get posix acl.
- */
-static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
-{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct buffer_head *di_bh = NULL;
- struct posix_acl *acl;
- int ret;
-
- if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
- return NULL;
-
- ret = ocfs2_inode_lock(inode, &di_bh, 0);
- if (ret < 0) {
- mlog_errno(ret);
- acl = ERR_PTR(ret);
- return acl;
- }
-
- acl = ocfs2_get_acl_nolock(inode, type, di_bh);
-
- ocfs2_inode_unlock(inode, 0);
-
- brelse(di_bh);
-
- return acl;
-}
-
/*
* Helper function to set i_mode in memory and disk. Some call paths
* will not have di_bh or a journal handle to pass, in which case it
@@ -235,6 +205,7 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
di->i_mode = cpu_to_le16(inode->i_mode);
di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, di_bh);
@@ -250,7 +221,7 @@ out:
/*
* Set the access or default ACL of an inode.
*/
-static int ocfs2_set_acl(handle_t *handle,
+int ocfs2_set_acl(handle_t *handle,
struct inode *inode,
struct buffer_head *di_bh,
int type,
@@ -313,6 +284,11 @@ static int ocfs2_set_acl(handle_t *handle,
return ret;
}
+int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ return ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
+}
+
struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
{
struct ocfs2_super *osb;
@@ -334,200 +310,3 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
return acl;
}
-
-int ocfs2_acl_chmod(struct inode *inode)
-{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct posix_acl *acl;
- int ret;
-
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
- if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
- return 0;
-
- acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl) || !acl)
- return PTR_ERR(acl);
- ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
- if (ret)
- return ret;
- ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
- acl, NULL, NULL);
- posix_acl_release(acl);
- return ret;
-}
-
-/*
- * Initialize the ACLs of a new inode. If parent directory has default ACL,
- * then clone to new inode. Called from ocfs2_mknod.
- */
-int ocfs2_init_acl(handle_t *handle,
- struct inode *inode,
- struct inode *dir,
- struct buffer_head *di_bh,
- struct buffer_head *dir_bh,
- struct ocfs2_alloc_context *meta_ac,
- struct ocfs2_alloc_context *data_ac)
-{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct posix_acl *acl = NULL;
- int ret = 0, ret2;
- umode_t mode;
-
- if (!S_ISLNK(inode->i_mode)) {
- if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
- acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
- dir_bh);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- }
- if (!acl) {
- mode = inode->i_mode & ~current_umask();
- ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
- if (ret) {
- mlog_errno(ret);
- goto cleanup;
- }
- }
- }
- if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
- if (S_ISDIR(inode->i_mode)) {
- ret = ocfs2_set_acl(handle, inode, di_bh,
- ACL_TYPE_DEFAULT, acl,
- meta_ac, data_ac);
- if (ret)
- goto cleanup;
- }
- mode = inode->i_mode;
- ret = posix_acl_create(&acl, GFP_NOFS, &mode);
- if (ret < 0)
- return ret;
-
- ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
- if (ret2) {
- mlog_errno(ret2);
- ret = ret2;
- goto cleanup;
- }
- if (ret > 0) {
- ret = ocfs2_set_acl(handle, inode,
- di_bh, ACL_TYPE_ACCESS,
- acl, meta_ac, data_ac);
- }
- }
-cleanup:
- posix_acl_release(acl);
- return ret;
-}
-
-static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
- char *list,
- size_t list_len,
- const char *name,
- size_t name_len,
- int type)
-{
- struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
-
- if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
- return 0;
-
- if (list && size <= list_len)
- memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
- return size;
-}
-
-static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
- char *list,
- size_t list_len,
- const char *name,
- size_t name_len,
- int type)
-{
- struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
-
- if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
- return 0;
-
- if (list && size <= list_len)
- memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
- return size;
-}
-
-static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
- void *buffer, size_t size, int type)
-{
- struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- struct posix_acl *acl;
- int ret;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
- return -EOPNOTSUPP;
-
- acl = ocfs2_get_acl(dentry->d_inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl == NULL)
- return -ENODATA;
- ret = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
-
- return ret;
-}
-
-static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- struct inode *inode = dentry->d_inode;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct posix_acl *acl;
- int ret = 0;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
- return -EOPNOTSUPP;
-
- if (!inode_owner_or_capable(inode))
- return -EPERM;
-
- if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- else if (acl) {
- ret = posix_acl_valid(acl);
- if (ret)
- goto cleanup;
- }
- } else
- acl = NULL;
-
- ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
-
-cleanup:
- posix_acl_release(acl);
- return ret;
-}
-
-const struct xattr_handler ocfs2_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .list = ocfs2_xattr_list_acl_access,
- .get = ocfs2_xattr_get_acl,
- .set = ocfs2_xattr_set_acl,
-};
-
-const struct xattr_handler ocfs2_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .list = ocfs2_xattr_list_acl_default,
- .get = ocfs2_xattr_get_acl,
- .set = ocfs2_xattr_set_acl,
-};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 071fbd380f2..3fce68d0862 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -27,10 +27,13 @@ struct ocfs2_acl_entry {
};
struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type);
-extern int ocfs2_acl_chmod(struct inode *);
-extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
- struct buffer_head *, struct buffer_head *,
- struct ocfs2_alloc_context *,
- struct ocfs2_alloc_context *);
+int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ocfs2_set_acl(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *di_bh,
+ int type,
+ struct posix_acl *acl,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac);
#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 17e6bdde96c..9d8fcf2f3b9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1025,7 +1025,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
for(i = count; i < (num_got + count); i++) {
bhs[i] = sb_getblk(osb->sb, first_blkno);
if (bhs[i] == NULL) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
@@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
enum ocfs2_alloc_restarted *reason_ret)
{
int status = 0, err = 0;
+ int need_free = 0;
int free_extents;
enum ocfs2_alloc_restarted reason = RESTART_NONE;
u32 bit_off, num_bits;
@@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
- goto leave;
+ need_free = 1;
+ goto bail;
}
block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
@@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
num_bits, flags, meta_ac);
if (status < 0) {
mlog_errno(status);
- goto leave;
+ need_free = 1;
+ goto bail;
}
ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
reason = RESTART_TRANS;
}
+bail:
+ if (need_free) {
+ if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+ ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+ bit_off, num_bits);
+ else
+ ocfs2_free_clusters(handle,
+ data_ac->ac_inode,
+ data_ac->ac_bh,
+ ocfs2_clusters_to_blocks(osb->sb, bit_off),
+ num_bits);
+ }
+
leave:
if (reason_ret)
*reason_ret = reason;
@@ -5712,6 +5728,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
}
ocfs2_et_update_clusters(et, -len);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -6029,7 +6046,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work)
void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
int cancel)
{
- if (osb->osb_tl_inode) {
+ if (osb->osb_tl_inode &&
+ atomic_read(&osb->osb_tl_disable) == 0) {
/* We want to push off log flushes while truncates are
* still running. */
if (cancel)
@@ -6206,6 +6224,8 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
int status;
struct inode *tl_inode = osb->osb_tl_inode;
+ atomic_set(&osb->osb_tl_disable, 1);
+
if (tl_inode) {
cancel_delayed_work(&osb->osb_truncate_log_wq);
flush_workqueue(ocfs2_wq);
@@ -6237,6 +6257,7 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
* until we're sure all is well. */
INIT_DELAYED_WORK(&osb->osb_truncate_log_wq,
ocfs2_truncate_log_worker);
+ atomic_set(&osb->osb_tl_disable, 0);
osb->osb_tl_bh = tl_bh;
osb->osb_tl_inode = tl_inode;
@@ -6805,6 +6826,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
int ret, i, has_data, num_pages = 0;
+ int need_free = 0;
+ u32 bit_off, num;
handle_t *handle;
u64 uninitialized_var(block);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
@@ -6850,7 +6873,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
if (has_data) {
- u32 bit_off, num;
unsigned int page_end;
u64 phys;
@@ -6886,6 +6908,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -6896,6 +6919,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -6913,6 +6937,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_dinode_new_extent_list(inode, di);
ocfs2_journal_dirty(handle, di_bh);
@@ -6927,6 +6952,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -6938,6 +6964,18 @@ out_commit:
dquot_free_space_nodirty(inode,
ocfs2_clusters_to_bytes(osb->sb, 1));
+ if (need_free) {
+ if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+ ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+ bit_off, num);
+ else
+ ocfs2_free_clusters(handle,
+ data_ac->ac_inode,
+ data_ac->ac_bh,
+ ocfs2_clusters_to_blocks(osb->sb, bit_off),
+ num);
+ }
+
ocfs2_commit_trans(osb, handle);
out_unlock:
@@ -7126,7 +7164,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
if (end > i_size_read(inode))
end = i_size_read(inode);
- BUG_ON(start >= end);
+ BUG_ON(start > end);
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
@@ -7176,6 +7214,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, di_bh);
out_commit:
@@ -7260,14 +7299,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
start = range->start >> osb->s_clustersize_bits;
len = range->len >> osb->s_clustersize_bits;
minlen = range->minlen >> osb->s_clustersize_bits;
- trimmed = 0;
- if (!len) {
- range->len = 0;
- return 0;
- }
-
- if (minlen >= osb->bitmap_cpg)
+ if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
return -EINVAL;
main_bm_inode = ocfs2_get_system_file_inode(osb,
@@ -7293,6 +7326,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
goto out_unlock;
}
+ len = range->len >> osb->s_clustersize_bits;
if (start + len > le32_to_cpu(main_bm->i_clusters))
len = le32_to_cpu(main_bm->i_clusters) - start;
@@ -7307,6 +7341,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
last_bit = osb->bitmap_cpg;
+ trimmed = 0;
for (group = first_group; group <= last_group;) {
if (first_bit + len >= osb->bitmap_cpg)
last_bit = osb->bitmap_cpg;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f37d3c0e205..4a231a166cf 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -80,6 +80,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
le32_to_cpu(fe->i_clusters))) {
+ err = -ENOMEM;
mlog(ML_ERROR, "block offset is outside the allocated size: "
"%llu\n", (unsigned long long)iblock);
goto bail;
@@ -92,6 +93,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
iblock;
buffer_cache_bh = sb_getblk(osb->sb, blkno);
if (!buffer_cache_bh) {
+ err = -ENOMEM;
mlog(ML_ERROR, "couldn't getblock for symlink!\n");
goto bail;
}
@@ -569,7 +571,6 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
{
struct inode *inode = file_inode(iocb->ki_filp);
int level;
- wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -580,10 +581,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
if (ocfs2_iocb_is_unaligned_aio(iocb)) {
ocfs2_iocb_clear_unaligned_aio(iocb);
- if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) &&
- waitqueue_active(wq)) {
- wake_up_all(wq);
- }
+ mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
}
ocfs2_iocb_clear_rw_locked(iocb);
@@ -592,33 +590,17 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
ocfs2_rw_unlock(inode, level);
}
-/*
- * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen
- * from ext3. PageChecked() bits have been removed as OCFS2 does not
- * do journalled data.
- */
-static void ocfs2_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
-{
- journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
-
- jbd2_journal_invalidatepage(journal, page, offset, length);
-}
-
static int ocfs2_releasepage(struct page *page, gfp_t wait)
{
- journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
-
if (!page_has_buffers(page))
return 0;
- return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ return try_to_free_buffers(page);
}
static ssize_t ocfs2_direct_IO(int rw,
struct kiocb *iocb,
- const struct iovec *iov,
- loff_t offset,
- unsigned long nr_segs)
+ struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file)->i_mapping->host;
@@ -635,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw,
return 0;
return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
- iov, offset, nr_segs,
+ iter, offset,
ocfs2_direct_IO_get_blocks,
ocfs2_dio_end_io, NULL, 0);
}
@@ -1802,8 +1784,7 @@ try_again:
data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
credits = ocfs2_calc_extend_credits(inode->i_sb,
- &di->id2.i_list,
- clusters_to_alloc);
+ &di->id2.i_list);
}
@@ -1897,10 +1878,14 @@ out_commit:
out:
ocfs2_free_write_ctxt(wc);
- if (data_ac)
+ if (data_ac) {
ocfs2_free_alloc_context(data_ac);
- if (meta_ac)
+ data_ac = NULL;
+ }
+ if (meta_ac) {
ocfs2_free_alloc_context(meta_ac);
+ meta_ac = NULL;
+ }
if (ret == -ENOSPC && try_free) {
/*
@@ -2053,6 +2038,7 @@ out_write_size:
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, wc->w_di_bh);
ocfs2_commit_trans(osb, handle);
@@ -2087,7 +2073,7 @@ const struct address_space_operations ocfs2_aops = {
.write_end = ocfs2_write_end,
.bmap = ocfs2_bmap,
.direct_IO = ocfs2_direct_IO,
- .invalidatepage = ocfs2_invalidatepage,
+ .invalidatepage = block_invalidatepage,
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index f671e49beb3..6cae155d54d 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -102,9 +102,4 @@ enum ocfs2_iocb_lock_bits {
#define ocfs2_iocb_is_unaligned_aio(iocb) \
test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
-#define OCFS2_IOEND_WQ_HASH_SZ 37
-#define ocfs2_ioend_wq(v) (&ocfs2__ioend_wq[((unsigned long)(v)) %\
- OCFS2_IOEND_WQ_HASH_SZ])
-extern wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
-
#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 5d18ad10c27..1edcb141f63 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
* information for this bh as it's not marked locally
* uptodate. */
ret = -EIO;
- put_bh(bh);
mlog_errno(ret);
}
@@ -115,7 +114,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(osb->sb, block++);
if (bhs[i] == NULL) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
@@ -214,7 +213,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
bhs[i] = sb_getblk(sb, block++);
if (bhs[i] == NULL) {
ocfs2_metadata_cache_io_unlock(ci);
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
@@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
if (!buffer_uptodate(bh)) {
ret = -EIO;
- put_bh(bh);
mlog_errno(ret);
}
diff --git a/fs/ocfs2/cluster/Makefile b/fs/ocfs2/cluster/Makefile
index bc8c5e7d860..1aefc0350ec 100644
--- a/fs/ocfs2/cluster/Makefile
+++ b/fs/ocfs2/cluster/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o
ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
- quorum.o tcp.o netdebug.o ver.o
+ quorum.o tcp.o netdebug.o
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 363f0dcc924..73039295d0d 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -35,6 +35,7 @@
#include <linux/time.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
+#include <linux/bitmap.h>
#include "heartbeat.h"
#include "tcp.h"
@@ -282,15 +283,6 @@ struct o2hb_bio_wait_ctxt {
int wc_error;
};
-static int o2hb_pop_count(void *map, int count)
-{
- int i = -1, pop = 0;
-
- while ((i = find_next_bit(map, count, i + 1)) < count)
- pop++;
- return pop;
-}
-
static void o2hb_write_timeout(struct work_struct *work)
{
int failed, quorum;
@@ -307,9 +299,9 @@ static void o2hb_write_timeout(struct work_struct *work)
spin_lock_irqsave(&o2hb_live_lock, flags);
if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
set_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
- failed = o2hb_pop_count(&o2hb_failed_region_bitmap,
+ failed = bitmap_weight(o2hb_failed_region_bitmap,
O2NM_MAX_REGIONS);
- quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ quorum = bitmap_weight(o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS);
spin_unlock_irqrestore(&o2hb_live_lock, flags);
@@ -421,7 +413,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
}
/* Must put everything in 512 byte sectors for the bio... */
- bio->bi_sector = (reg->hr_start_block + cs) << (bits - 9);
+ bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9);
bio->bi_bdev = reg->hr_bdev;
bio->bi_private = wc;
bio->bi_end_io = o2hb_bio_end_io;
@@ -765,7 +757,7 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg)
* If global heartbeat active, unpin all regions if the
* region count > CUT_OFF
*/
- if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ if (bitmap_weight(o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
o2hb_region_unpin(NULL);
unlock:
@@ -954,23 +946,9 @@ out:
return changed;
}
-/* This could be faster if we just implmented a find_last_bit, but I
- * don't think the circumstances warrant it. */
-static int o2hb_highest_node(unsigned long *nodes,
- int numbits)
+static int o2hb_highest_node(unsigned long *nodes, int numbits)
{
- int highest, node;
-
- highest = numbits;
- node = -1;
- while ((node = find_next_bit(nodes, numbits, node + 1)) != -1) {
- if (node >= numbits)
- break;
-
- highest = node;
- }
-
- return highest;
+ return find_last_bit(nodes, numbits);
}
static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
@@ -1129,7 +1107,7 @@ static int o2hb_thread(void *data)
mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread running\n");
- set_user_nice(current, -20);
+ set_user_nice(current, MIN_NICE);
/* Pin node */
o2nm_depend_this_node();
@@ -1829,7 +1807,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
live_threshold = O2HB_LIVE_THRESHOLD;
if (o2hb_global_heartbeat_active()) {
spin_lock(&o2hb_live_lock);
- if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
+ if (bitmap_weight(o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
live_threshold <<= 1;
spin_unlock(&o2hb_live_lock);
}
@@ -2180,7 +2158,7 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
if (!o2hb_dependent_users)
goto unlock;
- if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ if (bitmap_weight(o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
o2hb_region_pin(NULL);
@@ -2480,7 +2458,7 @@ static int o2hb_region_inc_user(const char *region_uuid)
if (o2hb_dependent_users > 1)
goto unlock;
- if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ if (bitmap_weight(o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
ret = o2hb_region_pin(NULL);
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index baa2b9ef7ee..2260fb9e650 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -199,7 +199,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
#define mlog_errno(st) do { \
int _st = (st); \
if (_st != -ERESTARTSYS && _st != -EINTR && \
- _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC) \
+ _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \
+ _st != -EDQUOT) \
mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
} while (0)
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index bb240647ca5..441c84e169e 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -29,7 +29,6 @@
#include "heartbeat.h"
#include "masklog.h"
#include "sys.h"
-#include "ver.h"
/* for now we operate under the assertion that there can be only one
* cluster active at a time. Changing this will require trickling
@@ -945,8 +944,6 @@ static int __init init_o2nm(void)
{
int ret = -1;
- cluster_print_version();
-
ret = o2hb_init();
if (ret)
goto out;
@@ -984,6 +981,7 @@ out:
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 cluster management");
module_init(init_o2nm)
module_exit(exit_o2nm)
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index a4b07730b2e..b7f57271d49 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -41,7 +41,7 @@ static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr,
return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION);
}
static struct kobj_attribute attr_version =
- __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL);
+ __ATTR(interface_revision, S_IRUGO, version_show, NULL);
static struct attribute *o2cb_attrs[] = {
&attr_version.attr,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2cd2406b414..681691bc233 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -108,7 +108,7 @@ static struct rb_root o2net_handler_tree = RB_ROOT;
static struct o2net_node o2net_nodes[O2NM_MAX_NODES];
/* XXX someday we'll need better accounting */
-static struct socket *o2net_listen_sock = NULL;
+static struct socket *o2net_listen_sock;
/*
* listen work is only queued by the listening socket callbacks on the
@@ -137,7 +137,7 @@ static int o2net_sys_err_translations[O2NET_ERR_MAX] =
static void o2net_sc_connect_completed(struct work_struct *work);
static void o2net_rx_until_empty(struct work_struct *work);
static void o2net_shutdown_sc(struct work_struct *work);
-static void o2net_listen_data_ready(struct sock *sk, int bytes);
+static void o2net_listen_data_ready(struct sock *sk);
static void o2net_sc_send_keep_req(struct work_struct *work);
static void o2net_idle_timer(unsigned long data);
static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
@@ -262,17 +262,17 @@ static void o2net_update_recv_stats(struct o2net_sock_container *sc)
#endif /* CONFIG_OCFS2_FS_STATS */
-static inline int o2net_reconnect_delay(void)
+static inline unsigned int o2net_reconnect_delay(void)
{
return o2nm_single_cluster->cl_reconnect_delay_ms;
}
-static inline int o2net_keepalive_delay(void)
+static inline unsigned int o2net_keepalive_delay(void)
{
return o2nm_single_cluster->cl_keepalive_delay_ms;
}
-static inline int o2net_idle_timeout(void)
+static inline unsigned int o2net_idle_timeout(void)
{
return o2nm_single_cluster->cl_idle_timeout_ms;
}
@@ -597,9 +597,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
}
/* see o2net_register_callbacks() */
-static void o2net_data_ready(struct sock *sk, int bytes)
+static void o2net_data_ready(struct sock *sk)
{
- void (*ready)(struct sock *sk, int bytes);
+ void (*ready)(struct sock *sk);
read_lock(&sk->sk_callback_lock);
if (sk->sk_user_data) {
@@ -613,7 +613,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
}
read_unlock(&sk->sk_callback_lock);
- ready(sk, bytes);
+ ready(sk);
}
/* see o2net_register_callbacks() */
@@ -916,57 +916,30 @@ static struct o2net_msg_handler *o2net_handler_get(u32 msg_type, u32 key)
static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
{
- int ret;
- mm_segment_t oldfs;
- struct kvec vec = {
- .iov_len = len,
- .iov_base = data,
- };
- struct msghdr msg = {
- .msg_iovlen = 1,
- .msg_iov = (struct iovec *)&vec,
- .msg_flags = MSG_DONTWAIT,
- };
-
- oldfs = get_fs();
- set_fs(get_ds());
- ret = sock_recvmsg(sock, &msg, len, msg.msg_flags);
- set_fs(oldfs);
-
- return ret;
+ struct kvec vec = { .iov_len = len, .iov_base = data, };
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
+ return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags);
}
static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
size_t veclen, size_t total)
{
int ret;
- mm_segment_t oldfs;
- struct msghdr msg = {
- .msg_iov = (struct iovec *)vec,
- .msg_iovlen = veclen,
- };
+ struct msghdr msg;
if (sock == NULL) {
ret = -EINVAL;
goto out;
}
- oldfs = get_fs();
- set_fs(get_ds());
- ret = sock_sendmsg(sock, &msg, total);
- set_fs(oldfs);
- if (ret != total) {
- mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret,
- total);
- if (ret >= 0)
- ret = -EPIPE; /* should be smarter, I bet */
- goto out;
- }
-
- ret = 0;
+ ret = kernel_sendmsg(sock, &msg, vec, veclen, total);
+ if (likely(ret == total))
+ return 0;
+ mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total);
+ if (ret >= 0)
+ ret = -EPIPE; /* should be smarter, I bet */
out:
- if (ret < 0)
- mlog(0, "returning error: %d\n", ret);
+ mlog(0, "returning error: %d\n", ret);
return ret;
}
@@ -1826,7 +1799,7 @@ int o2net_register_hb_callbacks(void)
/* ------------------------------------------------------------ */
-static int o2net_accept_one(struct socket *sock)
+static int o2net_accept_one(struct socket *sock, int *more)
{
int ret, slen;
struct sockaddr_in sin;
@@ -1837,6 +1810,7 @@ static int o2net_accept_one(struct socket *sock)
struct o2net_node *nn;
BUG_ON(sock == NULL);
+ *more = 0;
ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
sock->sk->sk_protocol, &new_sock);
if (ret)
@@ -1848,6 +1822,7 @@ static int o2net_accept_one(struct socket *sock)
if (ret < 0)
goto out;
+ *more = 1;
new_sock->sk->sk_allocation = GFP_ATOMIC;
ret = o2net_set_nodelay(new_sock);
@@ -1946,16 +1921,41 @@ out:
return ret;
}
+/*
+ * This function is invoked in response to one or more
+ * pending accepts at softIRQ level. We must drain the
+ * entire que before returning.
+ */
+
static void o2net_accept_many(struct work_struct *work)
{
struct socket *sock = o2net_listen_sock;
- while (o2net_accept_one(sock) == 0)
+ int more;
+ int err;
+
+ /*
+ * It is critical to note that due to interrupt moderation
+ * at the network driver level, we can't assume to get a
+ * softIRQ for every single conn since tcp SYN packets
+ * can arrive back-to-back, and therefore many pending
+ * accepts may result in just 1 softIRQ. If we terminate
+ * the o2net_accept_one() loop upon seeing an err, what happens
+ * to the rest of the conns in the queue? If no new SYN
+ * arrives for hours, no softIRQ will be delivered,
+ * and the connections will just sit in the queue.
+ */
+
+ for (;;) {
+ err = o2net_accept_one(sock, &more);
+ if (!more)
+ break;
cond_resched();
+ }
}
-static void o2net_listen_data_ready(struct sock *sk, int bytes)
+static void o2net_listen_data_ready(struct sock *sk)
{
- void (*ready)(struct sock *sk, int bytes);
+ void (*ready)(struct sock *sk);
read_lock(&sk->sk_callback_lock);
ready = sk->sk_user_data;
@@ -1964,18 +1964,29 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes)
goto out;
}
- /* ->sk_data_ready is also called for a newly established child socket
- * before it has been accepted and the acceptor has set up their
- * data_ready.. we only want to queue listen work for our listening
- * socket */
+ /* This callback may called twice when a new connection
+ * is being established as a child socket inherits everything
+ * from a parent LISTEN socket, including the data_ready cb of
+ * the parent. This leads to a hazard. In o2net_accept_one()
+ * we are still initializing the child socket but have not
+ * changed the inherited data_ready callback yet when
+ * data starts arriving.
+ * We avoid this hazard by checking the state.
+ * For the listening socket, the state will be TCP_LISTEN; for the new
+ * socket, will be TCP_ESTABLISHED. Also, in this case,
+ * sk->sk_user_data is not a valid function pointer.
+ */
+
if (sk->sk_state == TCP_LISTEN) {
- mlog(ML_TCP, "bytes: %d\n", bytes);
queue_work(o2net_wq, &o2net_listen_work);
+ } else {
+ ready = NULL;
}
out:
read_unlock(&sk->sk_callback_lock);
- ready(sk, bytes);
+ if (ready != NULL)
+ ready(sk);
}
static int o2net_open_listening_sock(__be32 addr, __be16 port)
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 4cbcb65784a..dc024367110 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -165,7 +165,7 @@ struct o2net_sock_container {
/* original handlers for the sockets */
void (*sc_state_change)(struct sock *sk);
- void (*sc_data_ready)(struct sock *sk, int bytes);
+ void (*sc_data_ready)(struct sock *sk);
u32 sc_msg_key;
u16 sc_msg_type;
diff --git a/fs/ocfs2/cluster/ver.c b/fs/ocfs2/cluster/ver.c
deleted file mode 100644
index a56eee6abad..00000000000
--- a/fs/ocfs2/cluster/ver.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "ver.h"
-
-#define CLUSTER_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION
-
-void cluster_print_version(void)
-{
- printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(CLUSTER_BUILD_VERSION);
diff --git a/fs/ocfs2/cluster/ver.h b/fs/ocfs2/cluster/ver.h
deleted file mode 100644
index 32554c3382c..00000000000
--- a/fs/ocfs2/cluster/ver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef O2CLUSTER_VER_H
-#define O2CLUSTER_VER_H
-
-void cluster_print_version(void);
-
-#endif /* O2CLUSTER_VER_H */
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 0d3a97d2d5f..e2e05a106be 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -37,7 +37,6 @@
#include "dlmglue.h"
#include "file.h"
#include "inode.h"
-#include "super.h"
#include "ocfs2_trace.h"
void ocfs2_dentry_attach_gen(struct dentry *dentry)
@@ -346,52 +345,6 @@ out_attach:
return ret;
}
-DEFINE_SPINLOCK(dentry_list_lock);
-
-/* We limit the number of dentry locks to drop in one go. We have
- * this limit so that we don't starve other users of ocfs2_wq. */
-#define DL_INODE_DROP_COUNT 64
-
-/* Drop inode references from dentry locks */
-static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
-{
- struct ocfs2_dentry_lock *dl;
-
- spin_lock(&dentry_list_lock);
- while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
- dl = osb->dentry_lock_list;
- osb->dentry_lock_list = dl->dl_next;
- spin_unlock(&dentry_list_lock);
- iput(dl->dl_inode);
- kfree(dl);
- spin_lock(&dentry_list_lock);
- }
- spin_unlock(&dentry_list_lock);
-}
-
-void ocfs2_drop_dl_inodes(struct work_struct *work)
-{
- struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
- dentry_lock_work);
-
- __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
- /*
- * Don't queue dropping if umount is in progress. We flush the
- * list in ocfs2_dismount_volume
- */
- spin_lock(&dentry_list_lock);
- if (osb->dentry_lock_list &&
- !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
- queue_work(ocfs2_wq, &osb->dentry_lock_work);
- spin_unlock(&dentry_list_lock);
-}
-
-/* Flush the whole work queue */
-void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
-{
- __ocfs2_drop_dl_inodes(osb, -1);
-}
-
/*
* ocfs2_dentry_iput() and friends.
*
@@ -416,24 +369,16 @@ void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
+ iput(dl->dl_inode);
ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
ocfs2_lock_res_free(&dl->dl_lockres);
-
- /* We leave dropping of inode reference to ocfs2_wq as that can
- * possibly lead to inode deletion which gets tricky */
- spin_lock(&dentry_list_lock);
- if (!osb->dentry_lock_list &&
- !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
- queue_work(ocfs2_wq, &osb->dentry_lock_work);
- dl->dl_next = osb->dentry_lock_list;
- osb->dentry_lock_list = dl;
- spin_unlock(&dentry_list_lock);
+ kfree(dl);
}
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
- int unlock;
+ int unlock = 0;
BUG_ON(dl->dl_count == 0);
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index b79eff70995..55f58892b15 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,13 +29,8 @@
extern const struct dentry_operations ocfs2_dentry_ops;
struct ocfs2_dentry_lock {
- /* Use count of dentry lock */
unsigned int dl_count;
- union {
- /* Linked list of dentry locks to release */
- struct ocfs2_dentry_lock *dl_next;
- u64 dl_parent_blkno;
- };
+ u64 dl_parent_blkno;
/*
* The ocfs2_dentry_lock keeps an inode reference until
@@ -49,14 +44,9 @@ struct ocfs2_dentry_lock {
int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
u64 parent_blkno);
-extern spinlock_t dentry_list_lock;
-
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl);
-void ocfs2_drop_dl_inodes(struct work_struct *work);
-void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
-
struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
int skip_unhashed);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 30544ce8e9f..0717662b4ae 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2349,7 +2349,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
dx_root_bh = sb_getblk(osb->sb, dr_blkno);
if (dx_root_bh == NULL) {
- ret = -EIO;
+ ret = -ENOMEM;
goto out;
}
ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
@@ -2422,7 +2422,7 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
for (i = 0; i < num_dx_leaves; i++) {
bh = sb_getblk(osb->sb, start_blk + i);
if (bh == NULL) {
- ret = -EIO;
+ ret = -ENOMEM;
goto out;
}
dx_leaves[i] = bh;
@@ -2929,7 +2929,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
dirdata_bh = sb_getblk(sb, blkno);
if (!dirdata_bh) {
- ret = -EIO;
+ ret = -ENOMEM;
mlog_errno(ret);
goto out_commit;
}
@@ -2957,6 +2957,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
ocfs2_init_dir_trailer(dir, dirdata_bh, i);
}
+ ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_journal_dirty(handle, dirdata_bh);
if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
@@ -3005,6 +3006,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
di->i_size = cpu_to_le64(sb->s_blocksize);
di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, dir, 1);
/*
* This should never fail as our extent list is empty and all
@@ -3159,7 +3161,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
*new_bh = sb_getblk(sb, p_blkno);
if (!*new_bh) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
@@ -3284,7 +3286,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
if (ocfs2_dir_resv_allowed(osb))
data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
- credits = ocfs2_calc_extend_credits(sb, el, 1);
+ credits = ocfs2_calc_extend_credits(sb, el);
} else {
spin_unlock(&OCFS2_I(dir)->ip_lock);
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
@@ -3338,6 +3340,7 @@ do_extend:
} else {
de->rec_len = cpu_to_le16(sb->s_blocksize);
}
+ ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_journal_dirty(handle, new_bh);
dir_i_size += dir->i_sb->s_blocksize;
@@ -3716,7 +3719,7 @@ static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
{
int credits = ocfs2_clusters_to_blocks(osb->sb, 2);
- credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1);
+ credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list);
credits += ocfs2_quota_trans_credits(osb->sb);
return credits;
}
@@ -3896,6 +3899,7 @@ out_commit:
dquot_free_space_nodirty(dir,
ocfs2_clusters_to_bytes(dir->i_sb, 1));
+ ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_commit_trans(osb, handle);
out:
@@ -4134,6 +4138,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
mlog_errno(ret);
did_quota = 0;
+ ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_journal_dirty(handle, dx_root_bh);
out_commit:
@@ -4401,6 +4406,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
spin_unlock(&OCFS2_I(dir)->ip_lock);
di->i_dx_root = cpu_to_le64(0ULL);
+ ocfs2_update_inode_fsync_trans(handle, dir, 1);
ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index c8a044efbb1..bd1aab1f49a 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -3,5 +3,5 @@ ccflags-y := -Ifs/ocfs2
obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
- dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
+ dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index e0517762fcc..fae17c640df 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -108,7 +108,6 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len)
struct dlm_recovery_ctxt
{
struct list_head resources;
- struct list_head received;
struct list_head node_data;
u8 new_master;
u8 dead_node;
@@ -332,6 +331,7 @@ struct dlm_lock_resource
u16 state;
char lvb[DLM_LVB_LEN];
unsigned int inflight_locks;
+ unsigned int inflight_assert_workers;
unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
};
@@ -911,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
+void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res);
+
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index e33cd7a3c58..18f13c2e4a1 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -338,7 +338,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle)
#ifdef CONFIG_DEBUG_FS
-static struct dentry *dlm_debugfs_root = NULL;
+static struct dentry *dlm_debugfs_root;
#define DLM_DEBUGFS_DIR "o2dlm"
#define DLM_DEBUGFS_DLM_STATE "dlm_state"
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 8b3382abf84..39efc5057a3 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -43,8 +43,6 @@
#include "dlmdomain.h"
#include "dlmdebug.h"
-#include "dlmver.h"
-
#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
#include "cluster/masklog.h"
@@ -961,6 +959,14 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
* domain. Set him in the map and clean up our
* leftover join state. */
BUG_ON(dlm->joining_node != assert->node_idx);
+
+ if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
+ mlog(0, "dlm recovery is ongoing, disallow join\n");
+ spin_unlock(&dlm->spinlock);
+ spin_unlock(&dlm_domain_lock);
+ return -EAGAIN;
+ }
+
set_bit(assert->node_idx, dlm->domain_map);
clear_bit(assert->node_idx, dlm->exit_domain_map);
__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
@@ -1125,7 +1131,6 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
struct dlm_ctxt *dlm = NULL;
char *local = NULL;
int status = 0;
- int locked = 0;
qr = (struct dlm_query_region *) msg->buf;
@@ -1134,10 +1139,8 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
/* buffer used in dlm_mast_regions() */
local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
- if (!local) {
- status = -ENOMEM;
- goto bail;
- }
+ if (!local)
+ return -ENOMEM;
status = -EINVAL;
@@ -1146,16 +1149,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
if (!dlm) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"before join domain\n", qr->qr_node, qr->qr_domain);
- goto bail;
+ goto out_domain_lock;
}
spin_lock(&dlm->spinlock);
- locked = 1;
if (dlm->joining_node != qr->qr_node) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"but joining node is %d\n", qr->qr_node, qr->qr_domain,
dlm->joining_node);
- goto bail;
+ goto out_dlm_lock;
}
/* Support for global heartbeat was added in 1.1 */
@@ -1165,14 +1167,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
"but active dlm protocol is %d.%d\n", qr->qr_node,
qr->qr_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
- goto bail;
+ goto out_dlm_lock;
}
status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions));
-bail:
- if (locked)
- spin_unlock(&dlm->spinlock);
+out_dlm_lock:
+ spin_unlock(&dlm->spinlock);
+
+out_domain_lock:
spin_unlock(&dlm_domain_lock);
kfree(local);
@@ -1522,6 +1525,7 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
unsigned int node)
{
int status;
+ int ret;
struct dlm_assert_joined assert_msg;
mlog(0, "Sending join assert to node %u\n", node);
@@ -1533,11 +1537,13 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
status = o2net_send_message(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
&assert_msg, sizeof(assert_msg), node,
- NULL);
+ &ret);
if (status < 0)
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
node);
+ else
+ status = ret;
return status;
}
@@ -1879,19 +1885,19 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
goto bail;
}
- status = dlm_debug_init(dlm);
+ status = dlm_launch_thread(dlm);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- status = dlm_launch_thread(dlm);
+ status = dlm_launch_recovery_thread(dlm);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- status = dlm_launch_recovery_thread(dlm);
+ status = dlm_debug_init(dlm);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -2028,7 +2034,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
INIT_LIST_HEAD(&dlm->list);
INIT_LIST_HEAD(&dlm->dirty_list);
INIT_LIST_HEAD(&dlm->reco.resources);
- INIT_LIST_HEAD(&dlm->reco.received);
INIT_LIST_HEAD(&dlm->reco.node_data);
INIT_LIST_HEAD(&dlm->purge_list);
INIT_LIST_HEAD(&dlm->dlm_domain_handlers);
@@ -2328,8 +2333,6 @@ static int __init dlm_init(void)
{
int status;
- dlm_print_version();
-
status = dlm_init_mle_cache();
if (status) {
mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n");
@@ -2379,6 +2382,7 @@ static void __exit dlm_exit (void)
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 Distributed Lock Management");
module_init(dlm_init);
module_exit(dlm_exit);
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 5d32f7511f7..66c2a491f68 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -52,7 +52,7 @@
#define MLOG_MASK_PREFIX ML_DLM
#include "cluster/masklog.h"
-static struct kmem_cache *dlm_lock_cache = NULL;
+static struct kmem_cache *dlm_lock_cache;
static DEFINE_SPINLOCK(dlm_cookie_lock);
static u64 dlm_next_cookie = 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index cf0f103963b..82abf0cc9a1 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -82,9 +82,9 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
return 1;
}
-static struct kmem_cache *dlm_lockres_cache = NULL;
-static struct kmem_cache *dlm_lockname_cache = NULL;
-static struct kmem_cache *dlm_mle_cache = NULL;
+static struct kmem_cache *dlm_lockres_cache;
+static struct kmem_cache *dlm_lockname_cache;
+static struct kmem_cache *dlm_mle_cache;
static void dlm_mle_release(struct kref *kref);
static void dlm_init_mle(struct dlm_master_list_entry *mle,
@@ -472,11 +472,15 @@ bail:
void dlm_destroy_master_caches(void)
{
- if (dlm_lockname_cache)
+ if (dlm_lockname_cache) {
kmem_cache_destroy(dlm_lockname_cache);
+ dlm_lockname_cache = NULL;
+ }
- if (dlm_lockres_cache)
+ if (dlm_lockres_cache) {
kmem_cache_destroy(dlm_lockres_cache);
+ dlm_lockres_cache = NULL;
+ }
}
static void dlm_lockres_release(struct kref *kref)
@@ -577,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
atomic_set(&res->asts_reserved, 0);
res->migration_pending = 0;
res->inflight_locks = 0;
+ res->inflight_assert_workers = 0;
res->dlm = dlm;
@@ -679,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
wake_up(&res->wq);
}
+void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ assert_spin_locked(&res->spinlock);
+ res->inflight_assert_workers++;
+ mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ res->inflight_assert_workers);
+}
+
+static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ spin_lock(&res->spinlock);
+ __dlm_lockres_grab_inflight_worker(dlm, res);
+ spin_unlock(&res->spinlock);
+}
+
+static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ assert_spin_locked(&res->spinlock);
+ BUG_ON(res->inflight_assert_workers == 0);
+ res->inflight_assert_workers--;
+ mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ res->inflight_assert_workers);
+}
+
+static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ spin_lock(&res->spinlock);
+ __dlm_lockres_drop_inflight_worker(dlm, res);
+ spin_unlock(&res->spinlock);
+}
+
/*
* lookup a lock resource by name.
* may already exist in the hashtable.
@@ -1599,7 +1641,8 @@ send_response:
mlog(ML_ERROR, "failed to dispatch assert master work\n");
response = DLM_MASTER_RESP_ERROR;
dlm_lockres_put(res);
- }
+ } else
+ dlm_lockres_grab_inflight_worker(dlm, res);
} else {
if (res)
dlm_lockres_put(res);
@@ -1885,8 +1928,10 @@ ok:
* up nodes that this node contacted */
while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
nn+1)) < O2NM_MAX_NODES) {
- if (nn != dlm->node_num && nn != assert->node_idx)
+ if (nn != dlm->node_num && nn != assert->node_idx) {
master_request = 1;
+ break;
+ }
}
}
mle->master = assert->node_idx;
@@ -2112,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
dlm_lockres_release_ast(dlm, res);
put:
+ dlm_lockres_drop_inflight_worker(dlm, res);
+
dlm_lockres_put(res);
mlog(0, "finished with dlm_assert_master_worker\n");
@@ -2354,6 +2401,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
assert_spin_locked(&res->spinlock);
+ /* delay migration when the lockres is in MIGRATING state */
+ if (res->state & DLM_LOCK_RES_MIGRATING)
+ return 0;
+
if (res->owner != dlm->node_num)
return 0;
@@ -3078,11 +3129,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
/* remove it so that only one mle will be found */
__dlm_unlink_mle(dlm, tmp);
__dlm_mle_detach_hb_events(dlm, tmp);
- ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
- mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
- "telling master to get ref for cleared out mle "
- "during migration\n", dlm->name, namelen, name,
- master, new_master);
+ if (tmp->type == DLM_MLE_MASTER) {
+ ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
+ mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
+ "telling master to get ref "
+ "for cleared out mle during "
+ "migration\n", dlm->name,
+ namelen, name, master,
+ new_master);
+ }
}
spin_unlock(&tmp->spinlock);
}
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 0b5adca1b17..45067faf569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -537,7 +537,10 @@ master_here:
/* success! see if any other nodes need recovery */
mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
dlm->name, dlm->reco.dead_node, dlm->node_num);
- dlm_reset_recovery(dlm);
+ spin_lock(&dlm->spinlock);
+ __dlm_reset_recovery(dlm);
+ dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+ spin_unlock(&dlm->spinlock);
}
dlm_end_recovery(dlm);
@@ -695,6 +698,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
if (all_nodes_done) {
int ret;
+ /* Set this flag on recovery master to avoid
+ * a new recovery for another dead node start
+ * before the recovery is not done. That may
+ * cause recovery hung.*/
+ spin_lock(&dlm->spinlock);
+ dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
+ spin_unlock(&dlm->spinlock);
+
/* all nodes are now in DLM_RECO_NODE_DATA_DONE state
* just send a finalize message to everyone and
* clean up */
@@ -1697,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
mlog_errno(-ENOMEM);
/* retry!? */
BUG();
- }
+ } else
+ __dlm_lockres_grab_inflight_worker(dlm, res);
} else /* put.. incase we are not the master */
dlm_lockres_put(res);
spin_unlock(&res->spinlock);
@@ -1750,13 +1762,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
struct dlm_migratable_lockres *mres)
{
struct dlm_migratable_lock *ml;
- struct list_head *queue;
+ struct list_head *queue, *iter;
struct list_head *tmpq = NULL;
struct dlm_lock *newlock = NULL;
struct dlm_lockstatus *lksb = NULL;
int ret = 0;
int i, j, bad;
- struct dlm_lock *lock = NULL;
+ struct dlm_lock *lock;
u8 from = O2NM_MAX_NODES;
unsigned int added = 0;
__be64 c;
@@ -1791,14 +1803,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
/* MIGRATION ONLY! */
BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
+ lock = NULL;
spin_lock(&res->spinlock);
for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
tmpq = dlm_list_idx_to_ptr(res, j);
- list_for_each_entry(lock, tmpq, list) {
- if (lock->ml.cookie != ml->cookie)
- lock = NULL;
- else
+ list_for_each(iter, tmpq) {
+ lock = list_entry(iter,
+ struct dlm_lock, list);
+ if (lock->ml.cookie == ml->cookie)
break;
+ lock = NULL;
}
if (lock)
break;
@@ -1886,6 +1900,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
if (ml->type == LKM_NLMODE)
goto skip_lvb;
+ /*
+ * If the lock is in the blocked list it can't have a valid lvb,
+ * so skip it
+ */
+ if (ml->list == DLM_BLOCKED_LIST)
+ goto skip_lvb;
+
if (!dlm_lvb_is_empty(mres->lvb)) {
if (lksb->flags & DLM_LKSB_PUT_LVB) {
/* other node was trying to update
@@ -1966,7 +1987,15 @@ skip_lvb:
}
if (!bad) {
dlm_lock_get(newlock);
- list_add_tail(&newlock->list, queue);
+ if (mres->flags & DLM_MRES_RECOVERY &&
+ ml->list == DLM_CONVERTING_LIST &&
+ newlock->ml.type >
+ newlock->ml.convert_type) {
+ /* newlock is doing downconvert, add it to the
+ * head of converting list */
+ list_add(&newlock->list, queue);
+ } else
+ list_add_tail(&newlock->list, queue);
mlog(0, "%s:%.*s: added lock for node %u, "
"setting refmap bit\n", dlm->name,
res->lockname.len, res->lockname.name, ml->node);
@@ -2875,8 +2904,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
BUG();
}
dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+ __dlm_reset_recovery(dlm);
spin_unlock(&dlm->spinlock);
- dlm_reset_recovery(dlm);
dlm_kick_recovery_thread(dlm);
break;
default:
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 9db869de829..69aac6f088a 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -259,12 +259,15 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
* refs on it. */
unused = __dlm_lockres_unused(lockres);
if (!unused ||
- (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+ (lockres->state & DLM_LOCK_RES_MIGRATING) ||
+ (lockres->inflight_assert_workers != 0)) {
mlog(0, "%s: res %.*s is in use or being remastered, "
- "used %d, state %d\n", dlm->name,
- lockres->lockname.len, lockres->lockname.name,
- !unused, lockres->state);
- list_move_tail(&dlm->purge_list, &lockres->purge);
+ "used %d, state %d, assert master workers %u\n",
+ dlm->name, lockres->lockname.len,
+ lockres->lockname.name,
+ !unused, lockres->state,
+ lockres->inflight_assert_workers);
+ list_move_tail(&lockres->purge, &dlm->purge_list);
spin_unlock(&lockres->spinlock);
continue;
}
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 5698b52cf5c..2e3c9dbab68 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -191,7 +191,9 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
} else if (status == DLM_RECOVERING ||
status == DLM_MIGRATING ||
- status == DLM_FORWARD) {
+ status == DLM_FORWARD ||
+ status == DLM_NOLOCKMGR
+ ) {
/* must clear the actions because this unlock
* is about to be retried. cannot free or do
* any list manipulation. */
@@ -200,7 +202,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
res->lockname.name,
status==DLM_RECOVERING?"recovering":
(status==DLM_MIGRATING?"migrating":
- "forward"));
+ (status == DLM_FORWARD ? "forward" :
+ "nolockmanager")));
actions = 0;
}
if (flags & LKM_CANCEL)
@@ -364,7 +367,10 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
* updated state to the recovery master. this thread
* just needs to finish out the operation and call
* the unlockast. */
- ret = DLM_NORMAL;
+ if (dlm_is_node_dead(dlm, owner))
+ ret = DLM_NORMAL;
+ else
+ ret = DLM_NOLOCKMGR;
} else {
/* something bad. this will BUG in ocfs2 */
ret = dlm_err_to_dlm_status(tmpret);
@@ -638,7 +644,9 @@ retry:
if (status == DLM_RECOVERING ||
status == DLM_MIGRATING ||
- status == DLM_FORWARD) {
+ status == DLM_FORWARD ||
+ status == DLM_NOLOCKMGR) {
+
/* We want to go away for a tiny bit to allow recovery
* / migration to complete on this resource. I don't
* know of any wait queue we could sleep on as this
@@ -650,7 +658,7 @@ retry:
msleep(50);
mlog(0, "retrying unlock due to pending recovery/"
- "migration/in-progress\n");
+ "migration/in-progress/reconnect\n");
goto retry;
}
diff --git a/fs/ocfs2/dlm/dlmver.c b/fs/ocfs2/dlm/dlmver.c
deleted file mode 100644
index dfc0da4d158..00000000000
--- a/fs/ocfs2/dlm/dlmver.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "dlmver.h"
-
-#define DLM_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION
-
-void dlm_print_version(void)
-{
- printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(DLM_BUILD_VERSION);
diff --git a/fs/ocfs2/dlm/dlmver.h b/fs/ocfs2/dlm/dlmver.h
deleted file mode 100644
index f674aee77a1..00000000000
--- a/fs/ocfs2/dlm/dlmver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmfsver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef DLM_VER_H
-#define DLM_VER_H
-
-void dlm_print_version(void);
-
-#endif /* DLM_VER_H */
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile
index f14be89a670..eed3db8c5b4 100644
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -2,4 +2,4 @@ ccflags-y := -Ifs/ocfs2
obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
-ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o
+ocfs2_dlmfs-objs := userdlm.o dlmfs.o
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index efa2b3d339e..09b7d9dac71 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -49,7 +49,6 @@
#include "stackglue.h"
#include "userdlm.h"
-#include "dlmfsver.h"
#define MLOG_MASK_PREFIX ML_DLMFS
#include "cluster/masklog.h"
@@ -644,8 +643,6 @@ static int __init init_dlmfs_fs(void)
int status;
int cleanup_inode = 0, cleanup_worker = 0;
- dlmfs_print_version();
-
status = bdi_init(&dlmfs_backing_dev_info);
if (status)
return status;
@@ -701,6 +698,7 @@ static void __exit exit_dlmfs_fs(void)
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 DLM-Filesystem");
module_init(init_dlmfs_fs)
module_exit(exit_dlmfs_fs)
diff --git a/fs/ocfs2/dlmfs/dlmfsver.c b/fs/ocfs2/dlmfs/dlmfsver.c
deleted file mode 100644
index a733b3321f8..00000000000
--- a/fs/ocfs2/dlmfs/dlmfsver.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmfsver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "dlmfsver.h"
-
-#define DLM_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION
-
-void dlmfs_print_version(void)
-{
- printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(DLM_BUILD_VERSION);
diff --git a/fs/ocfs2/dlmfs/dlmfsver.h b/fs/ocfs2/dlmfs/dlmfsver.h
deleted file mode 100644
index f35eadbed25..00000000000
--- a/fs/ocfs2/dlmfs/dlmfsver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef DLMFS_VER_H
-#define DLMFS_VER_H
-
-void dlmfs_print_version(void);
-
-#endif /* DLMFS_VER_H */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3a44a648dae..52cfe99ae05 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1304,7 +1304,7 @@ static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
{
wait_for_completion(&mw->mw_complete);
/* Re-arm the completion in case we want to wait on it again */
- INIT_COMPLETION(mw->mw_complete);
+ reinit_completion(&mw->mw_complete);
return mw->mw_status;
}
@@ -1355,7 +1355,7 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
else
ret = mw->mw_status;
/* Re-arm the completion in case we want to wait on it again */
- INIT_COMPLETION(mw->mw_complete);
+ reinit_completion(&mw->mw_complete);
return ret;
}
@@ -2544,11 +2544,6 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
* refreshed, so we do it here. Of course, making sense of
* everything is up to the caller :) */
status = ocfs2_should_refresh_lock_res(lockres);
- if (status < 0) {
- ocfs2_cluster_unlock(osb, lockres, level);
- mlog_errno(status);
- goto bail;
- }
if (status) {
status = ocfs2_refresh_slot_info(osb);
@@ -2996,6 +2991,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
/* for now, uuid == domain */
status = ocfs2_cluster_connect(osb->osb_cluster_stack,
+ osb->osb_cluster_name,
+ strlen(osb->osb_cluster_name),
osb->uuid_str,
strlen(osb->uuid_str),
&lproto, ocfs2_do_node_down, osb,
@@ -3005,7 +3002,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
goto bail;
}
- status = ocfs2_cluster_this_node(&osb->node_num);
+ status = ocfs2_cluster_this_node(conn, &osb->node_num);
if (status < 0) {
mlog_errno(status);
mlog(ML_ERROR,
@@ -3142,22 +3139,60 @@ out:
return 0;
}
+static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres);
+
/* Mark the lockres as being dropped. It will no longer be
* queued if blocking, but we still may have to wait on it
* being dequeued from the downconvert thread before we can consider
* it safe to drop.
*
* You can *not* attempt to call cluster_lock on this lockres anymore. */
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres)
{
int status;
struct ocfs2_mask_waiter mw;
- unsigned long flags;
+ unsigned long flags, flags2;
ocfs2_init_mask_waiter(&mw);
spin_lock_irqsave(&lockres->l_lock, flags);
lockres->l_flags |= OCFS2_LOCK_FREEING;
+ if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
+ /*
+ * We know the downconvert is queued but not in progress
+ * because we are the downconvert thread and processing
+ * different lock. So we can just remove the lock from the
+ * queue. This is not only an optimization but also a way
+ * to avoid the following deadlock:
+ * ocfs2_dentry_post_unlock()
+ * ocfs2_dentry_lock_put()
+ * ocfs2_drop_dentry_lock()
+ * iput()
+ * ocfs2_evict_inode()
+ * ocfs2_clear_inode()
+ * ocfs2_mark_lockres_freeing()
+ * ... blocks waiting for OCFS2_LOCK_QUEUED
+ * since we are the downconvert thread which
+ * should clear the flag.
+ */
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
+ spin_lock_irqsave(&osb->dc_task_lock, flags2);
+ list_del_init(&lockres->l_blocked_list);
+ osb->blocked_lock_count--;
+ spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
+ /*
+ * Warn if we recurse into another post_unlock call. Strictly
+ * speaking it isn't a problem but we need to be careful if
+ * that happens (stack overflow, deadlocks, ...) so warn if
+ * ocfs2 grows a path for which this can happen.
+ */
+ WARN_ON_ONCE(lockres->l_ops->post_unlock);
+ /* Since the lock is freeing we don't do much in the fn below */
+ ocfs2_process_blocked_lock(osb, lockres);
+ return;
+ }
while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
spin_unlock_irqrestore(&lockres->l_lock, flags);
@@ -3178,7 +3213,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
{
int ret;
- ocfs2_mark_lockres_freeing(lockres);
+ ocfs2_mark_lockres_freeing(osb, lockres);
ret = ocfs2_drop_lock(osb, lockres);
if (ret)
mlog_errno(ret);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 1d596d8c4a4..d293a22c32c 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex);
void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres);
void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d71903c6068..2930e231f3f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -175,9 +175,13 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
int datasync)
{
int err = 0;
- journal_t *journal;
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ journal_t *journal = osb->journal->j_journal;
+ int ret;
+ tid_t commit_tid;
+ bool needs_barrier = false;
trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
OCFS2_I(inode)->ip_blkno,
@@ -185,33 +189,26 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
file->f_path.dentry->d_name.name,
(unsigned long long)datasync);
+ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+ return -EROFS;
+
err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;
- /*
- * Probably don't need the i_mutex at all in here, just putting it here
- * to be consistent with how fsync used to be called, someone more
- * familiar with the fs could possibly remove it.
- */
- mutex_lock(&inode->i_mutex);
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
- /*
- * We still have to flush drive's caches to get data to the
- * platter
- */
- if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
- blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- goto bail;
+ commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid;
+ if (journal->j_flags & JBD2_BARRIER &&
+ !jbd2_trans_will_send_data_barrier(journal, commit_tid))
+ needs_barrier = true;
+ err = jbd2_complete_transaction(journal, commit_tid);
+ if (needs_barrier) {
+ ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ if (!err)
+ err = ret;
}
- journal = osb->journal->j_journal;
- err = jbd2_journal_force_commit(journal);
-
-bail:
if (err)
mlog_errno(err);
- mutex_unlock(&inode->i_mutex);
return (err < 0) ? -EIO : 0;
}
@@ -289,6 +286,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
inode->i_atime = CURRENT_TIME;
di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, bh);
out_commit:
@@ -338,6 +336,7 @@ int ocfs2_simple_size_update(struct inode *inode,
if (ret < 0)
mlog_errno(ret);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_commit_trans(osb, handle);
out:
return ret;
@@ -432,6 +431,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
di->i_size = cpu_to_le64(new_i_size);
di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
@@ -474,11 +474,6 @@ static int ocfs2_truncate_file(struct inode *inode,
goto bail;
}
- /* lets handle the simple truncate cases before doing any more
- * cluster locking. */
- if (new_i_size == le64_to_cpu(fe->i_size))
- goto bail;
-
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ocfs2_resv_discard(&osb->osb_la_resmap,
@@ -580,7 +575,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
int did_quota = 0;
/*
- * This function only exists for file systems which don't
+ * Unwritten extent only exists for file systems which
* support holes.
*/
BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
@@ -603,8 +598,7 @@ restart_all:
goto leave;
}
- credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list,
- clusters_to_add);
+ credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list);
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
@@ -653,7 +647,7 @@ restarted_transaction:
mlog_errno(status);
goto leave;
}
-
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, bh);
spin_lock(&OCFS2_I(inode)->ip_lock);
@@ -719,7 +713,8 @@ leave:
* While a write will already be ordering the data, a truncate will not.
* Thus, we need to explicitly order the zeroed pages.
*/
-static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
+ struct buffer_head *di_bh)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
handle_t *handle = NULL;
@@ -736,8 +731,16 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
}
ret = ocfs2_jbd2_file_inode(handle, inode);
- if (ret < 0)
+ if (ret < 0) {
mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret)
+ mlog_errno(ret);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
out:
if (ret) {
@@ -752,7 +755,7 @@ out:
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->write_begin() and ->write_end(). */
static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
- u64 abs_to)
+ u64 abs_to, struct buffer_head *di_bh)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
@@ -760,6 +763,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
handle_t *handle = NULL;
int ret = 0;
unsigned zero_from, zero_to, block_start, block_end;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
BUG_ON(abs_from >= abs_to);
BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
@@ -802,7 +806,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
}
if (!handle) {
- handle = ocfs2_zero_start_ordered_transaction(inode);
+ handle = ocfs2_zero_start_ordered_transaction(inode,
+ di_bh);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
@@ -819,8 +824,23 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
ret = 0;
}
- if (handle)
+ if (handle) {
+ /*
+ * fs-writeback will release the dirty pages without page lock
+ * whose offset are over inode size, the release happens at
+ * block_write_full_page().
+ */
+ i_size_write(inode, abs_to);
+ inode->i_blocks = ocfs2_inode_sector_count(inode);
+ di->i_size = cpu_to_le64((u64)i_size_read(inode));
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ di->i_mtime_nsec = di->i_ctime_nsec;
+ ocfs2_journal_dirty(handle, di_bh);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+ }
out_unlock:
unlock_page(page);
@@ -916,7 +936,7 @@ out:
* has made sure that the entire range needs zeroing.
*/
static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
- u64 range_end)
+ u64 range_end, struct buffer_head *di_bh)
{
int rc = 0;
u64 next_pos;
@@ -932,7 +952,7 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
if (next_pos > range_end)
next_pos = range_end;
- rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+ rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
if (rc < 0) {
mlog_errno(rc);
break;
@@ -978,7 +998,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
range_end = zero_to_size;
ret = ocfs2_zero_extend_range(inode, range_start,
- range_end);
+ range_end, di_bh);
if (ret) {
mlog_errno(ret);
break;
@@ -1146,14 +1166,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
goto bail_unlock_rw;
}
- if (size_change && attr->ia_size != i_size_read(inode)) {
+ if (size_change) {
status = inode_newsize_ok(inode, attr->ia_size);
if (status)
goto bail_unlock;
inode_dio_wait(inode);
- if (i_size_read(inode) > attr->ia_size) {
+ if (i_size_read(inode) >= attr->ia_size) {
if (ocfs2_should_order_data(inode)) {
status = ocfs2_begin_ordered_truncate(inode,
attr->ia_size);
@@ -1237,7 +1257,7 @@ bail:
dqput(transfer_to[qtype]);
if (!status && attr->ia_valid & ATTR_MODE) {
- status = ocfs2_acl_chmod(inode);
+ status = posix_acl_chmod(inode, inode->i_mode);
if (status < 0)
mlog_errno(status);
}
@@ -1323,6 +1343,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
di = (struct ocfs2_dinode *) bh->b_data;
di->i_mode = cpu_to_le16(inode->i_mode);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, bh);
@@ -1555,6 +1576,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
if (ret)
mlog_errno(ret);
}
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_commit_trans(osb, handle);
out:
@@ -1870,7 +1892,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
}
size = sr->l_start + sr->l_len;
- if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
+ if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 ||
+ cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) {
if (sr->l_len <= 0) {
ret = -EINVAL;
goto out_inode_unlock;
@@ -2039,13 +2062,6 @@ out:
return ret;
}
-static void ocfs2_aiodio_wait(struct inode *inode)
-{
- wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
-
- wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0));
-}
-
static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
{
int blockmask = inode->i_sb->s_blocksize - 1;
@@ -2217,16 +2233,13 @@ out:
return ret;
}
-static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos)
+static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
{
int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
int can_do_direct, has_refcount = 0;
ssize_t written = 0;
- size_t ocount; /* original count */
- size_t count; /* after file limit checks */
+ size_t count = iov_iter_count(from);
loff_t old_size, *ppos = &iocb->ki_pos;
u32 old_clusters;
struct file *file = iocb->ki_filp;
@@ -2240,7 +2253,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
file->f_path.dentry->d_name.len,
file->f_path.dentry->d_name.name,
- (unsigned int)nr_segs);
+ (unsigned int)from->nr_segs); /* GRRRRR */
if (iocb->ki_nbytes == 0)
return 0;
@@ -2323,10 +2336,8 @@ relock:
* Wait on previous unaligned aio to complete before
* proceeding.
*/
- ocfs2_aiodio_wait(inode);
-
- /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */
- atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio);
+ mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio);
+ /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */
ocfs2_iocb_set_unaligned_aio(iocb);
}
@@ -2340,28 +2351,23 @@ relock:
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_rw_locked(iocb, rw_level);
- ret = generic_segment_checks(iov, &nr_segs, &ocount,
- VERIFY_READ);
- if (ret)
- goto out_dio;
-
- count = ocount;
ret = generic_write_checks(file, ppos, &count,
S_ISBLK(inode->i_mode));
if (ret)
goto out_dio;
+ iov_iter_truncate(from, count);
if (direct_io) {
- written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
- ppos, count, ocount);
+ written = generic_file_direct_write(iocb, from, *ppos);
if (written < 0) {
ret = written;
goto out_dio;
}
} else {
current->backing_dev_info = file->f_mapping->backing_dev_info;
- written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
- ppos, count, 0);
+ written = generic_perform_write(file, from, *ppos);
+ if (likely(written >= 0))
+ iocb->ki_pos = *ppos + written;
current->backing_dev_info = NULL;
}
@@ -2371,8 +2377,8 @@ out_dio:
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
if (ret < 0)
written = ret;
@@ -2385,8 +2391,8 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
}
/*
@@ -2406,7 +2412,7 @@ out_dio:
if (unaligned_dio) {
ocfs2_iocb_clear_unaligned_aio(iocb);
- atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
+ mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
}
out:
@@ -2424,84 +2430,6 @@ out_sems:
return ret;
}
-static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
- struct file *out,
- struct splice_desc *sd)
-{
- int ret;
-
- ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
- sd->total_len, 0, NULL, NULL);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
- }
-
- return splice_from_pipe_feed(pipe, sd, pipe_to_file);
-}
-
-static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
- struct file *out,
- loff_t *ppos,
- size_t len,
- unsigned int flags)
-{
- int ret;
- struct address_space *mapping = out->f_mapping;
- struct inode *inode = mapping->host;
- struct splice_desc sd = {
- .total_len = len,
- .flags = flags,
- .pos = *ppos,
- .u.file = out,
- };
-
-
- trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry,
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- out->f_path.dentry->d_name.len,
- out->f_path.dentry->d_name.name, len);
-
- pipe_lock(pipe);
-
- splice_from_pipe_begin(&sd);
- do {
- ret = splice_from_pipe_next(pipe, &sd);
- if (ret <= 0)
- break;
-
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
- ret = ocfs2_rw_lock(inode, 1);
- if (ret < 0)
- mlog_errno(ret);
- else {
- ret = ocfs2_splice_to_file(pipe, out, &sd);
- ocfs2_rw_unlock(inode, 1);
- }
- mutex_unlock(&inode->i_mutex);
- } while (ret > 0);
- splice_from_pipe_end(pipe, &sd);
-
- pipe_unlock(pipe);
-
- if (sd.num_spliced)
- ret = sd.num_spliced;
-
- if (ret > 0) {
- int err;
-
- err = generic_write_sync(out, *ppos, ret);
- if (err)
- ret = err;
- else
- *ppos += ret;
-
- balance_dirty_pages_ratelimited(mapping);
- }
-
- return ret;
-}
-
static ssize_t ocfs2_file_splice_read(struct file *in,
loff_t *ppos,
struct pipe_inode_info *pipe,
@@ -2517,7 +2445,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
in->f_path.dentry->d_name.name, len);
/*
- * See the comment in ocfs2_file_aio_read()
+ * See the comment in ocfs2_file_read_iter()
*/
ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
if (ret < 0) {
@@ -2532,10 +2460,8 @@ bail:
return ret;
}
-static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos)
+static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
+ struct iov_iter *to)
{
int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
struct file *filp = iocb->ki_filp;
@@ -2544,7 +2470,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
filp->f_path.dentry->d_name.len,
- filp->f_path.dentry->d_name.name, nr_segs);
+ filp->f_path.dentry->d_name.name,
+ to->nr_segs); /* GRRRRR */
if (!inode) {
@@ -2589,13 +2516,13 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
}
ocfs2_inode_unlock(inode, lock_level);
- ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
+ ret = generic_file_read_iter(iocb, to);
trace_generic_file_aio_read_ret(ret);
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
- /* see ocfs2_file_aio_write */
+ /* see ocfs2_file_write_iter */
if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
rw_level = -1;
have_alloc_sem = 0;
@@ -2623,7 +2550,16 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
case SEEK_SET:
break;
case SEEK_END:
- offset += inode->i_size;
+ /* SEEK_END requires the OCFS2 inode lock for the file
+ * because it references the file's size.
+ */
+ ret = ocfs2_inode_lock(inode, NULL, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ offset += i_size_read(inode);
+ ocfs2_inode_unlock(inode, 0);
break;
case SEEK_CUR:
if (offset == 0) {
@@ -2662,6 +2598,7 @@ const struct inode_operations ocfs2_file_iops = {
.removexattr = generic_removexattr,
.fiemap = ocfs2_fiemap,
.get_acl = ocfs2_iop_get_acl,
+ .set_acl = ocfs2_iop_set_acl,
};
const struct inode_operations ocfs2_special_file_iops = {
@@ -2669,6 +2606,7 @@ const struct inode_operations ocfs2_special_file_iops = {
.getattr = ocfs2_getattr,
.permission = ocfs2_permission,
.get_acl = ocfs2_iop_get_acl,
+ .set_acl = ocfs2_iop_set_acl,
};
/*
@@ -2677,14 +2615,14 @@ const struct inode_operations ocfs2_special_file_iops = {
*/
const struct file_operations ocfs2_fops = {
.llseek = ocfs2_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
.mmap = ocfs2_mmap,
.fsync = ocfs2_sync_file,
.release = ocfs2_file_release,
.open = ocfs2_file_open,
- .aio_read = ocfs2_file_aio_read,
- .aio_write = ocfs2_file_aio_write,
+ .read_iter = ocfs2_file_read_iter,
+ .write_iter = ocfs2_file_write_iter,
.unlocked_ioctl = ocfs2_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ocfs2_compat_ioctl,
@@ -2692,7 +2630,7 @@ const struct file_operations ocfs2_fops = {
.lock = ocfs2_lock,
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
- .splice_write = ocfs2_file_splice_write,
+ .splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
@@ -2725,21 +2663,21 @@ const struct file_operations ocfs2_dops = {
*/
const struct file_operations ocfs2_fops_no_plocks = {
.llseek = ocfs2_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
.mmap = ocfs2_mmap,
.fsync = ocfs2_sync_file,
.release = ocfs2_file_release,
.open = ocfs2_file_open,
- .aio_read = ocfs2_file_aio_read,
- .aio_write = ocfs2_file_aio_write,
+ .read_iter = ocfs2_file_read_iter,
+ .write_iter = ocfs2_file_write_iter,
.unlocked_ioctl = ocfs2_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
- .splice_write = ocfs2_file_splice_write,
+ .splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f87f9bd1edf..437de7f768c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -130,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
struct ocfs2_find_inode_args args;
+ journal_t *journal = OCFS2_SB(sb)->journal->j_journal;
trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
sysfile_type);
@@ -169,6 +170,32 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
goto bail;
}
+ /*
+ * Set transaction id's of transactions that have to be committed
+ * to finish f[data]sync. We set them to currently running transaction
+ * as we cannot be sure that the inode or some of its metadata isn't
+ * part of the transaction - the inode could have been reclaimed and
+ * now it is reread from disk.
+ */
+ if (journal) {
+ transaction_t *transaction;
+ tid_t tid;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+ read_lock(&journal->j_state_lock);
+ if (journal->j_running_transaction)
+ transaction = journal->j_running_transaction;
+ else
+ transaction = journal->j_committing_transaction;
+ if (transaction)
+ tid = transaction->t_tid;
+ else
+ tid = journal->j_commit_sequence;
+ read_unlock(&journal->j_state_lock);
+ oi->i_sync_tid = tid;
+ oi->i_datasync_tid = tid;
+ }
+
bail:
if (!IS_ERR(inode)) {
trace_ocfs2_iget_end(inode,
@@ -386,19 +413,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
u32 generation = 0;
status = -EINVAL;
- if (inode == NULL || inode->i_sb == NULL) {
- mlog(ML_ERROR, "bad inode\n");
- return status;
- }
sb = inode->i_sb;
osb = OCFS2_SB(sb);
- if (!args) {
- mlog(ML_ERROR, "bad inode args\n");
- make_bad_inode(inode);
- return status;
- }
-
/*
* To improve performance of cold-cache inode stats, we take
* the cluster lock here if possible.
@@ -814,11 +831,13 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
goto bail;
}
- /* If we're coming from downconvert_thread we can't go into our own
- * voting [hello, deadlock city!], so unforuntately we just
- * have to skip deleting this guy. That's OK though because
- * the node who's doing the actual deleting should handle it
- * anyway. */
+ /*
+ * If we're coming from downconvert_thread we can't go into our own
+ * voting [hello, deadlock city!] so we cannot delete the inode. But
+ * since we dropped last inode ref when downconverting dentry lock,
+ * we cannot have the file open and thus the node doing unlink will
+ * take care of deleting the inode.
+ */
if (current == osb->dc_task)
goto bail;
@@ -832,12 +851,6 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
goto bail_unlock;
}
- /* If we have allowd wipe of this inode for another node, it
- * will be marked here so we can safely skip it. Recovery will
- * cleanup any inodes we might inadvertently skip here. */
- if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE)
- goto bail_unlock;
-
ret = 1;
bail_unlock:
spin_unlock(&oi->ip_lock);
@@ -951,7 +964,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
(unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
if (sync_data)
filemap_write_and_wait(inode->i_mapping);
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
}
static void ocfs2_delete_inode(struct inode *inode)
@@ -970,8 +983,6 @@ static void ocfs2_delete_inode(struct inode *inode)
if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno)
goto bail;
- dquot_initialize(inode);
-
if (!ocfs2_inode_is_valid_to_delete(inode)) {
/* It's probably not necessary to truncate_inode_pages
* here but we do it for safety anyway (it will most
@@ -980,6 +991,8 @@ static void ocfs2_delete_inode(struct inode *inode)
goto bail;
}
+ dquot_initialize(inode);
+
/* We want to block signals in delete_inode as the lock and
* messaging paths may return us -ERESTARTSYS. Which would
* cause us to exit early, resulting in inodes being orphaned
@@ -1067,6 +1080,7 @@ static void ocfs2_clear_inode(struct inode *inode)
{
int status;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
clear_inode(inode);
trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
@@ -1083,9 +1097,9 @@ static void ocfs2_clear_inode(struct inode *inode)
/* Do these before all the other work so that we don't bounce
* the downconvert thread while waiting to destroy the locks. */
- ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
- ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
- ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
+ ocfs2_mark_lockres_freeing(osb, &oi->ip_rw_lockres);
+ ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres);
+ ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres);
ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
&oi->ip_la_data_resv);
@@ -1167,7 +1181,7 @@ void ocfs2_evict_inode(struct inode *inode)
(OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
ocfs2_delete_inode(inode);
} else {
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
}
ocfs2_clear_inode(inode);
}
@@ -1270,6 +1284,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
ocfs2_journal_dirty(handle, bh);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
leave:
return status;
}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 621fc73bf23..a6c991c0fc9 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -44,7 +44,7 @@ struct ocfs2_inode_info
struct rw_semaphore ip_xattr_sem;
/* Number of outstanding AIO's which are not page aligned */
- atomic_t ip_unaligned_aio;
+ struct mutex ip_unaligned_aio;
/* These fields are protected by ip_lock */
spinlock_t ip_lock;
@@ -73,6 +73,13 @@ struct ocfs2_inode_info
u32 ip_dir_lock_gen;
struct ocfs2_alloc_reservation ip_la_data_resv;
+
+ /*
+ * Transactions that contain inode's metadata needed to complete
+ * fsync and fdatasync, respectively.
+ */
+ tid_t i_sync_tid;
+ tid_t i_datasync_tid;
};
/*
@@ -84,8 +91,6 @@ struct ocfs2_inode_info
#define OCFS2_INODE_BITMAP 0x00000004
/* This inode has been wiped from disk */
#define OCFS2_INODE_DELETED 0x00000008
-/* Another node is deleting, so our delete is a nop */
-#define OCFS2_INODE_SKIP_DELETE 0x00000010
/* Has the inode been orphaned on another node?
*
* This hints to ocfs2_drop_inode that it should clear i_nlink before
@@ -100,11 +105,11 @@ struct ocfs2_inode_info
* rely on ocfs2_delete_inode to sort things out under the proper
* cluster locks.
*/
-#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
+#define OCFS2_INODE_MAYBE_ORPHANED 0x00000010
/* Does someone have the file open O_DIRECT */
-#define OCFS2_INODE_OPEN_DIRECT 0x00000040
+#define OCFS2_INODE_OPEN_DIRECT 0x00000020
/* Tell the inode wipe code it's not in orphan dir */
-#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080
+#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000040
static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
{
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index fa32ce9b455..6f66b3751ac 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
+#include <linux/blkdev.h>
#include <linux/compat.h>
#include <cluster/masklog.h>
@@ -142,8 +143,8 @@ bail:
return status;
}
-int ocfs2_info_handle_blocksize(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_blocksize(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_blocksize oib;
@@ -166,8 +167,8 @@ bail:
return status;
}
-int ocfs2_info_handle_clustersize(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_clustersize(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_clustersize oic;
@@ -191,8 +192,8 @@ bail:
return status;
}
-int ocfs2_info_handle_maxslots(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_maxslots(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_maxslots oim;
@@ -216,8 +217,8 @@ bail:
return status;
}
-int ocfs2_info_handle_label(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_label(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_label oil;
@@ -241,8 +242,8 @@ bail:
return status;
}
-int ocfs2_info_handle_uuid(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_uuid(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_uuid oiu;
@@ -266,8 +267,8 @@ bail:
return status;
}
-int ocfs2_info_handle_fs_features(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_fs_features(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_fs_features oif;
@@ -293,8 +294,8 @@ bail:
return status;
}
-int ocfs2_info_handle_journal_size(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_journal_size(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_journal_size oij;
@@ -318,9 +319,10 @@ bail:
return status;
}
-int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
- struct inode *inode_alloc, u64 blkno,
- struct ocfs2_info_freeinode *fi, u32 slot)
+static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
+ struct inode *inode_alloc, u64 blkno,
+ struct ocfs2_info_freeinode *fi,
+ u32 slot)
{
int status = 0, unlock = 0;
@@ -365,8 +367,8 @@ bail:
return status;
}
-int ocfs2_info_handle_freeinode(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_freeinode(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
u32 i;
u64 blkno = -1;
@@ -412,11 +414,12 @@ int ocfs2_info_handle_freeinode(struct inode *inode,
}
status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i);
- if (status < 0)
- goto bail;
iput(inode_alloc);
inode_alloc = NULL;
+
+ if (status < 0)
+ goto bail;
}
o2info_set_request_filled(&oifi->ifi_req);
@@ -460,19 +463,19 @@ static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats,
stats->ffs_free_chunks_real++;
}
-void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg,
- unsigned int chunksize)
+static void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg,
+ unsigned int chunksize)
{
o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize);
o2ffg_update_stats(&(ffg->iff_ffs), chunksize);
}
-int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb,
- struct inode *gb_inode,
- struct ocfs2_dinode *gb_dinode,
- struct ocfs2_chain_rec *rec,
- struct ocfs2_info_freefrag *ffg,
- u32 chunks_in_group)
+static int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb,
+ struct inode *gb_inode,
+ struct ocfs2_dinode *gb_dinode,
+ struct ocfs2_chain_rec *rec,
+ struct ocfs2_info_freefrag *ffg,
+ u32 chunks_in_group)
{
int status = 0, used;
u64 blkno;
@@ -570,9 +573,9 @@ bail:
return status;
}
-int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
- struct inode *gb_inode, u64 blkno,
- struct ocfs2_info_freefrag *ffg)
+static int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
+ struct inode *gb_inode, u64 blkno,
+ struct ocfs2_info_freefrag *ffg)
{
u32 chunks_in_group;
int status = 0, unlock = 0, i;
@@ -650,8 +653,8 @@ bail:
return status;
}
-int ocfs2_info_handle_freefrag(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_freefrag(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
u64 blkno = -1;
char namebuf[40];
@@ -721,8 +724,8 @@ out_err:
return status;
}
-int ocfs2_info_handle_unknown(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_unknown(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_request oir;
@@ -750,8 +753,8 @@ bail:
* - distinguish different requests.
* - validate size of different requests.
*/
-int ocfs2_info_handle_request(struct inode *inode,
- struct ocfs2_info_request __user *req)
+static int ocfs2_info_handle_request(struct inode *inode,
+ struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_request oir;
@@ -809,8 +812,8 @@ bail:
return status;
}
-int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx,
- u64 *req_addr, int compat_flag)
+static int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx,
+ u64 *req_addr, int compat_flag)
{
int status = -EFAULT;
u64 __user *bp = NULL;
@@ -847,8 +850,8 @@ bail:
* a better backward&forward compatibility, since a small piece of
* request will be less likely to be broken if disk layout get changed.
*/
-int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info,
- int compat_flag)
+static int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info,
+ int compat_flag)
{
int i, status = 0;
u64 req_addr;
@@ -966,15 +969,21 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FITRIM:
{
struct super_block *sb = inode->i_sb;
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
if (copy_from_user(&range, argp, sizeof(range)))
return -EFAULT;
+ range.minlen = max_t(u64, q->limits.discard_granularity,
+ range.minlen);
ret = ocfs2_trim_fs(sb, &range);
if (ret < 0)
return ret;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 44fc3e530c3..4b0c68849b3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -30,6 +30,7 @@
#include <linux/kthread.h>
#include <linux/time.h>
#include <linux/random.h>
+#include <linux/delay.h>
#include <cluster/masklog.h>
@@ -2132,12 +2133,6 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
iter = oi->ip_next_orphan;
spin_lock(&oi->ip_lock);
- /* The remote delete code may have set these on the
- * assumption that the other node would wipe them
- * successfully. If they are still in the node's
- * orphan dir, we need to reset that state. */
- oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE);
-
/* Set the proper information to get us going into
* ocfs2_delete_inode. */
oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
@@ -2191,8 +2186,20 @@ static int ocfs2_commit_thread(void *arg)
|| kthread_should_stop());
status = ocfs2_commit_cache(osb);
- if (status < 0)
- mlog_errno(status);
+ if (status < 0) {
+ static unsigned long abort_warn_time;
+
+ /* Warn about this once per minute */
+ if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
+ mlog(ML_ERROR, "status = %d, journal is "
+ "already aborted.\n", status);
+ /*
+ * After ocfs2_commit_cache() fails, j_num_trans has a
+ * non-zero value. Sleep here to avoid a busy-wait
+ * loop.
+ */
+ msleep_interruptible(1000);
+ }
if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
mlog(ML_KTHREAD,
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 0b479bab367..7f8cde94abf 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -524,8 +524,7 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
* the result may be wrong.
*/
static inline int ocfs2_calc_extend_credits(struct super_block *sb,
- struct ocfs2_extent_list *root_el,
- u32 bits_wanted)
+ struct ocfs2_extent_list *root_el)
{
int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks;
@@ -627,4 +626,15 @@ static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
new_size);
}
+static inline void ocfs2_update_inode_fsync_trans(handle_t *handle,
+ struct inode *inode,
+ int datasync)
+{
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+ oi->i_sync_tid = handle->h_transaction->t_tid;
+ if (datasync)
+ oi->i_datasync_tid = handle->h_transaction->t_tid;
+}
+
#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index cd5496b7a0a..04401345562 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -781,6 +781,48 @@ bail:
return status;
}
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 bit_off,
+ u32 num_bits)
+{
+ int status, start;
+ u32 clear_bits;
+ struct inode *local_alloc_inode;
+ void *bitmap;
+ struct ocfs2_dinode *alloc;
+ struct ocfs2_local_alloc *la;
+
+ BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
+
+ local_alloc_inode = ac->ac_inode;
+ alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
+ la = OCFS2_LOCAL_ALLOC(alloc);
+
+ bitmap = la->la_bitmap;
+ start = bit_off - le32_to_cpu(la->la_bm_off);
+ clear_bits = num_bits;
+
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(local_alloc_inode),
+ osb->local_alloc_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ while (clear_bits--)
+ ocfs2_clear_bit(start++, bitmap);
+
+ le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
+ ocfs2_journal_dirty(handle, osb->local_alloc_bh);
+
+bail:
+ return status;
+}
+
static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
{
u32 count;
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 1be9b586446..44a7d1fb2de 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
u32 *bit_off,
u32 *num_bits);
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 bit_off,
+ u32 num_bits);
+
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters);
void ocfs2_la_enable_worker(struct work_struct *work);
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index e57c804069e..6b6d092b099 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -82,6 +82,8 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
}
ret = flock_lock_file_wait(file, fl);
+ if (ret)
+ ocfs2_file_unlock(file);
out:
mutex_unlock(&fp->fp_mutex);
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 3d3f3c83065..599eb4c4c8b 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -151,6 +151,7 @@ static int __ocfs2_move_extent(handle_t *handle,
old_blkno, len);
}
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
out:
ocfs2_free_path(path);
return ret;
@@ -201,8 +202,7 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
}
}
- *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el,
- clusters_to_move + 2);
+ *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n",
extra_blocks, clusters_to_move, *credits);
@@ -562,83 +562,6 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
}
-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
- handle_t *handle,
- struct buffer_head *di_bh,
- u32 num_bits,
- u16 chain)
-{
- int ret;
- u32 tmp_used;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
- struct ocfs2_chain_list *cl =
- (struct ocfs2_chain_list *) &di->id2.i_chain;
-
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
-
- tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
- di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
- le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
- ocfs2_journal_dirty(handle, di_bh);
-
-out:
- return ret;
-}
-
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct ocfs2_group_desc *bg,
- struct buffer_head *group_bh,
- unsigned int bit_off,
- unsigned int num_bits)
-{
- int status;
- void *bitmap = bg->bg_bitmap;
- int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
-
- /* All callers get the descriptor via
- * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
- BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
- BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
-
- mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
- num_bits);
-
- if (ocfs2_is_cluster_bitmap(alloc_inode))
- journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
-
- status = ocfs2_journal_access_gd(handle,
- INODE_CACHE(alloc_inode),
- group_bh,
- journal_type);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
- le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
- if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
- ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
- " count %u but claims %u are freed. num_bits %d",
- (unsigned long long)le64_to_cpu(bg->bg_blkno),
- le16_to_cpu(bg->bg_bits),
- le16_to_cpu(bg->bg_free_bits_count), num_bits);
- return -EROFS;
- }
- while (num_bits--)
- ocfs2_set_bit(bit_off++, bitmap);
-
- ocfs2_journal_dirty(handle, group_bh);
-
-bail:
- return status;
-}
-
static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
u32 len, int ext_flags)
@@ -768,8 +691,11 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
goal_bit, len);
- if (ret)
+ if (ret) {
+ ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len,
+ le16_to_cpu(gd->bg_chain));
mlog_errno(ret);
+ }
/*
* Here we should write the new page out first if we are
@@ -1035,6 +961,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
inode->i_ctime = CURRENT_TIME;
di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, di_bh);
@@ -1067,8 +994,10 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
if (status)
return status;
- if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE))
+ if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) {
+ status = -EPERM;
goto out_drop;
+ }
if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
status = -EPERM;
@@ -1090,8 +1019,10 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
goto out_free;
}
- if (range.me_start > i_size_read(inode))
+ if (range.me_start > i_size_read(inode)) {
+ status = -EINVAL;
goto out_free;
+ }
if (range.me_start + range.me_len > i_size_read(inode))
range.me_len = i_size_read(inode) - range.me_start;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index be3f8676a43..8add6f1030d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -205,6 +205,21 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
return inode;
}
+static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
+ struct dentry *dentry, struct inode *inode)
+{
+ struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+
+ ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
+ ocfs2_lock_res_free(&dl->dl_lockres);
+ BUG_ON(dl->dl_count != 1);
+ spin_lock(&dentry_attach_lock);
+ dentry->d_fsdata = NULL;
+ spin_unlock(&dentry_attach_lock);
+ kfree(dl);
+ iput(inode);
+}
+
static int ocfs2_mknod(struct inode *dir,
struct dentry *dentry,
umode_t mode,
@@ -230,6 +245,8 @@ static int ocfs2_mknod(struct inode *dir,
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
int did_block_signals = 0;
+ struct posix_acl *default_acl = NULL, *acl = NULL;
+ struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -331,6 +348,12 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
+ status = posix_acl_create(dir, &mode, &default_acl, &acl);
+ if (status) {
+ mlog_errno(status);
+ goto leave;
+ }
+
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
S_ISDIR(mode),
xattr_credits));
@@ -379,8 +402,17 @@ static int ocfs2_mknod(struct inode *dir,
inc_nlink(dir);
}
- status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
- meta_ac, data_ac);
+ if (default_acl) {
+ status = ocfs2_set_acl(handle, inode, new_fe_bh,
+ ACL_TYPE_DEFAULT, default_acl,
+ meta_ac, data_ac);
+ }
+ if (!status && acl) {
+ status = ocfs2_set_acl(handle, inode, new_fe_bh,
+ ACL_TYPE_ACCESS, acl,
+ meta_ac, data_ac);
+ }
+
if (status < 0) {
mlog_errno(status);
goto leave;
@@ -407,6 +439,8 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
+ dl = dentry->d_fsdata;
+
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
&lookup);
@@ -419,6 +453,10 @@ static int ocfs2_mknod(struct inode *dir,
d_instantiate(dentry, inode);
status = 0;
leave:
+ if (default_acl)
+ posix_acl_release(default_acl);
+ if (acl)
+ posix_acl_release(acl);
if (status < 0 && did_quota_inode)
dquot_free_inode(inode);
if (handle)
@@ -430,7 +468,6 @@ leave:
brelse(new_fe_bh);
brelse(parent_fe_bh);
- kfree(si.name);
kfree(si.value);
ocfs2_free_dir_lookup_result(&lookup);
@@ -450,6 +487,9 @@ leave:
* ocfs2_delete_inode will mutex_lock again.
*/
if ((status < 0) && inode) {
+ if (dl)
+ ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
+
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
clear_nlink(inode);
iput(inode);
@@ -475,6 +515,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
struct ocfs2_dinode *fe = NULL;
struct ocfs2_extent_list *fel;
u16 feat;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
*new_fe_bh = NULL;
@@ -489,7 +530,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
if (!*new_fe_bh) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto leave;
}
@@ -556,8 +597,8 @@ static int __ocfs2_mknod_locked(struct inode *dir,
mlog_errno(status);
}
- status = 0; /* error in ocfs2_create_new_inode_locks is not
- * critical */
+ oi->i_sync_tid = handle->h_transaction->t_tid;
+ oi->i_datasync_tid = handle->h_transaction->t_tid;
leave:
if (status < 0) {
@@ -644,6 +685,7 @@ static int ocfs2_link(struct dentry *old_dentry,
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
+ u64 old_de_ino;
trace_ocfs2_link((unsigned long long)OCFS2_I(inode)->ip_blkno,
old_dentry->d_name.len, old_dentry->d_name.name,
@@ -666,6 +708,22 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out;
}
+ err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name,
+ old_dentry->d_name.len, &old_de_ino);
+ if (err) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /*
+ * Check whether another node removed the source inode while we
+ * were in the vfs.
+ */
+ if (old_de_ino != OCFS2_I(inode)->ip_blkno) {
+ err = -ENOENT;
+ goto out;
+ }
+
err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
dentry->d_name.len);
if (err)
@@ -948,12 +1006,71 @@ leave:
ocfs2_free_dir_lookup_result(&orphan_insert);
ocfs2_free_dir_lookup_result(&lookup);
- if (status && (status != -ENOTEMPTY))
+ if (status && (status != -ENOTEMPTY) && (status != -ENOENT))
mlog_errno(status);
return status;
}
+static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
+ u64 src_inode_no, u64 dest_inode_no)
+{
+ int ret = 0, i = 0;
+ u64 parent_inode_no = 0;
+ u64 child_inode_no = src_inode_no;
+ struct inode *child_inode;
+
+#define MAX_LOOKUP_TIMES 32
+ while (1) {
+ child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
+ if (IS_ERR(child_inode)) {
+ ret = PTR_ERR(child_inode);
+ break;
+ }
+
+ ret = ocfs2_inode_lock(child_inode, NULL, 0);
+ if (ret < 0) {
+ iput(child_inode);
+ if (ret != -ENOENT)
+ mlog_errno(ret);
+ break;
+ }
+
+ ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
+ &parent_inode_no);
+ ocfs2_inode_unlock(child_inode, 0);
+ iput(child_inode);
+ if (ret < 0) {
+ ret = -ENOENT;
+ break;
+ }
+
+ if (parent_inode_no == dest_inode_no) {
+ ret = 1;
+ break;
+ }
+
+ if (parent_inode_no == osb->root_inode->i_ino) {
+ ret = 0;
+ break;
+ }
+
+ child_inode_no = parent_inode_no;
+
+ if (++i >= MAX_LOOKUP_TIMES) {
+ mlog(ML_NOTICE, "max lookup times reached, filesystem "
+ "may have nested directories, "
+ "src inode: %llu, dest inode: %llu.\n",
+ (unsigned long long)src_inode_no,
+ (unsigned long long)dest_inode_no);
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* The only place this should be used is rename!
* if they have the same id, then the 1st one is the only one locked.
@@ -965,6 +1082,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
struct inode *inode2)
{
int status;
+ int inode1_is_ancestor, inode2_is_ancestor;
struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
struct buffer_head **tmpbh;
@@ -978,9 +1096,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
if (*bh2)
*bh2 = NULL;
- /* we always want to lock the one with the lower lockid first. */
+ /* we always want to lock the one with the lower lockid first.
+ * and if they are nested, we lock ancestor first */
if (oi1->ip_blkno != oi2->ip_blkno) {
- if (oi1->ip_blkno < oi2->ip_blkno) {
+ inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
+ oi1->ip_blkno);
+ if (inode1_is_ancestor < 0) {
+ status = inode1_is_ancestor;
+ goto bail;
+ }
+
+ inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
+ oi2->ip_blkno);
+ if (inode2_is_ancestor < 0) {
+ status = inode2_is_ancestor;
+ goto bail;
+ }
+
+ if ((inode1_is_ancestor == 1) ||
+ (oi1->ip_blkno < oi2->ip_blkno &&
+ inode2_is_ancestor == 0)) {
/* switch id1 and id2 around */
tmpbh = bh2;
bh2 = bh1;
@@ -1061,6 +1196,7 @@ static int ocfs2_rename(struct inode *old_dir,
struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
struct ocfs2_dir_lookup_result target_insert = { NULL, };
+ bool should_add_orphan = false;
/* At some point it might be nice to break this function up a
* bit. */
@@ -1097,6 +1233,21 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
rename_lock = 1;
+
+ /* here we cannot guarantee the inodes haven't just been
+ * changed, so check if they are nested again */
+ status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
+ old_inode->i_ino);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ } else if (status == 1) {
+ status = -EPERM;
+ trace_ocfs2_rename_not_permitted(
+ (unsigned long long)old_inode->i_ino,
+ (unsigned long long)new_dir->i_ino);
+ goto bail;
+ }
}
/* if old and new are the same, this'll just do one lock. */
@@ -1267,6 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
mlog_errno(status);
goto bail;
}
+ should_add_orphan = true;
}
} else {
BUG_ON(new_dentry->d_parent->d_inode != new_dir);
@@ -1311,17 +1463,6 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
- if (S_ISDIR(new_inode->i_mode) ||
- (ocfs2_read_links_count(newfe) == 1)) {
- status = ocfs2_orphan_add(osb, handle, new_inode,
- newfe_bh, orphan_name,
- &orphan_insert, orphan_dir);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- }
-
/* change the dirent to point to the correct inode */
status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
old_inode);
@@ -1336,6 +1477,15 @@ static int ocfs2_rename(struct inode *old_dir,
else
ocfs2_add_links_count(newfe, -1);
ocfs2_journal_dirty(handle, newfe_bh);
+ if (should_add_orphan) {
+ status = ocfs2_orphan_add(osb, handle, new_inode,
+ newfe_bh, orphan_name,
+ &orphan_insert, orphan_dir);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
} else {
/* if the name was not found in new_dir, add it now */
status = ocfs2_add_entry(handle, new_dentry, old_inode,
@@ -1605,6 +1755,7 @@ static int ocfs2_symlink(struct inode *dir,
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
int did_block_signals = 0;
+ struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_symlink_begin(dir, dentry, symname,
dentry->d_name.len, dentry->d_name.name);
@@ -1793,6 +1944,8 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
+ dl = dentry->d_fsdata;
+
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
&lookup);
@@ -1818,7 +1971,6 @@ bail:
brelse(new_fe_bh);
brelse(parent_fe_bh);
- kfree(si.name);
kfree(si.value);
ocfs2_free_dir_lookup_result(&lookup);
if (inode_ac)
@@ -1828,6 +1980,9 @@ bail:
if (xattr_ac)
ocfs2_free_alloc_context(xattr_ac);
if ((status < 0) && inode) {
+ if (dl)
+ ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
+
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
clear_nlink(inode);
iput(inode);
@@ -2444,6 +2599,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
di->i_orphaned_slot = 0;
set_nlink(inode, 1);
ocfs2_set_links_count(di, inode->i_nlink);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, di_bh);
status = ocfs2_add_entry(handle, dentry, inode,
@@ -2504,4 +2660,5 @@ const struct inode_operations ocfs2_dir_iops = {
.removexattr = generic_removexattr,
.fiemap = ocfs2_fiemap,
.get_acl = ocfs2_iop_get_acl,
+ .set_acl = ocfs2_iop_set_acl,
};
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 3a903470c79..bbec539230f 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -30,6 +30,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/list.h>
+#include <linux/llist.h>
#include <linux/rbtree.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
@@ -274,19 +275,16 @@ enum ocfs2_mount_options
OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
};
-#define OCFS2_OSB_SOFT_RO 0x0001
-#define OCFS2_OSB_HARD_RO 0x0002
-#define OCFS2_OSB_ERROR_FS 0x0004
-#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
-
-#define OCFS2_DEFAULT_ATIME_QUANTUM 60
+#define OCFS2_OSB_SOFT_RO 0x0001
+#define OCFS2_OSB_HARD_RO 0x0002
+#define OCFS2_OSB_ERROR_FS 0x0004
+#define OCFS2_DEFAULT_ATIME_QUANTUM 60
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
struct ocfs2_replay_map;
struct ocfs2_quota_recovery;
-struct ocfs2_dentry_lock;
struct ocfs2_super
{
struct task_struct *commit_task;
@@ -387,6 +385,7 @@ struct ocfs2_super
u8 osb_stackflags;
char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
+ char osb_cluster_name[OCFS2_CLUSTER_NAME_LEN + 1];
struct ocfs2_cluster_connection *cconn;
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
@@ -413,10 +412,9 @@ struct ocfs2_super
struct list_head blocked_lock_list;
unsigned long blocked_lock_count;
- /* List of dentry locks to release. Anyone can add locks to
- * the list, ocfs2_wq processes the list */
- struct ocfs2_dentry_lock *dentry_lock_list;
- struct work_struct dentry_lock_work;
+ /* List of dquot structures to drop last reference to */
+ struct llist_head dquot_drop_list;
+ struct work_struct dquot_drop_work;
wait_queue_head_t osb_mount_event;
@@ -424,6 +422,7 @@ struct ocfs2_super
struct inode *osb_tl_inode;
struct buffer_head *osb_tl_bh;
struct delayed_work osb_truncate_log_wq;
+ atomic_t osb_tl_disable;
/*
* How many clusters in our truncate log.
* It must be protected by osb_tl_inode->i_mutex.
@@ -448,6 +447,8 @@ struct ocfs2_super
/* rb tree root for refcount lock. */
struct rb_root osb_rf_lock_tree;
struct ocfs2_refcount_tree *osb_ref_tree_lru;
+
+ struct mutex system_file_mutex;
};
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
@@ -578,18 +579,6 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
spin_unlock(&osb->osb_lock);
}
-
-static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
- unsigned long flag)
-{
- unsigned long ret;
-
- spin_lock(&osb->osb_lock);
- ret = osb->osb_flags & flag;
- spin_unlock(&osb->osb_lock);
- return ret;
-}
-
static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
int hard)
{
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 1b60c62aa9d..6cb019b7c6a 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -2292,6 +2292,8 @@ TRACE_EVENT(ocfs2_rename,
__entry->new_len, __get_str(new_name))
);
+DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted);
+
TRACE_EVENT(ocfs2_rename_target_exists,
TP_PROTO(int new_len, const char *new_name),
TP_ARGS(new_len, new_name),
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index d5ab56cbe5c..f266d67df3c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -28,6 +28,7 @@ struct ocfs2_dquot {
unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
s64 dq_origspace; /* Last globally synced space usage */
s64 dq_originodes; /* Last globally synced inode usage */
+ struct llist_node list; /* Member of list of dquots to drop */
};
/* Description of one chunk to recover in memory */
@@ -110,6 +111,7 @@ int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block,
int ocfs2_create_local_dquot(struct dquot *dquot);
int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot);
int ocfs2_local_write_dquot(struct dquot *dquot);
+void ocfs2_drop_dquot_refs(struct work_struct *work);
extern const struct dquot_operations ocfs2_quota_operations;
extern struct quota_format_type ocfs2_quota_format;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index aaa50611ec6..b990a62cff5 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -10,6 +10,7 @@
#include <linux/jiffies.h>
#include <linux/writeback.h>
#include <linux/workqueue.h>
+#include <linux/llist.h>
#include <cluster/masklog.h>
@@ -679,6 +680,27 @@ static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
OCFS2_INODE_UPDATE_CREDITS;
}
+void ocfs2_drop_dquot_refs(struct work_struct *work)
+{
+ struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+ dquot_drop_work);
+ struct llist_node *list;
+ struct ocfs2_dquot *odquot, *next_odquot;
+
+ list = llist_del_all(&osb->dquot_drop_list);
+ llist_for_each_entry_safe(odquot, next_odquot, list, list) {
+ /* Drop the reference we acquired in ocfs2_dquot_release() */
+ dqput(&odquot->dq_dquot);
+ }
+}
+
+/*
+ * Called when the last reference to dquot is dropped. If we are called from
+ * downconvert thread, we cannot do all the handling here because grabbing
+ * quota lock could deadlock (the node holding the quota lock could need some
+ * other cluster lock to proceed but with blocked downconvert thread we cannot
+ * release any lock).
+ */
static int ocfs2_release_dquot(struct dquot *dquot)
{
handle_t *handle;
@@ -694,6 +716,19 @@ static int ocfs2_release_dquot(struct dquot *dquot)
/* Check whether we are not racing with some other dqget() */
if (atomic_read(&dquot->dq_count) > 1)
goto out;
+ /* Running from downconvert thread? Postpone quota processing to wq */
+ if (current == osb->dc_task) {
+ /*
+ * Grab our own reference to dquot and queue it for delayed
+ * dropping. Quota code rechecks after calling
+ * ->release_dquot() and won't free dquot structure.
+ */
+ dqgrab(dquot);
+ /* First entry on list -> queue work */
+ if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list))
+ queue_work(ocfs2_wq, &osb->dquot_drop_work);
+ goto out;
+ }
status = ocfs2_lock_global_qf(oinfo, 1);
if (status < 0)
goto out;
@@ -717,6 +752,12 @@ static int ocfs2_release_dquot(struct dquot *dquot)
*/
if (status < 0)
mlog_errno(status);
+ /*
+ * Clear dq_off so that we search for the structure in quota file next
+ * time we acquire it. The structure might be deleted and reallocated
+ * elsewhere by another node while our dquot structure is on freelist.
+ */
+ dquot->dq_off = 0;
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_trans:
ocfs2_commit_trans(osb, handle);
@@ -756,16 +797,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
status = ocfs2_lock_global_qf(info, 1);
if (status < 0)
goto out;
- if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
- status = ocfs2_qinfo_lock(info, 0);
- if (status < 0)
- goto out_dq;
- status = qtree_read_dquot(&info->dqi_gi, dquot);
- ocfs2_qinfo_unlock(info, 0);
- if (status < 0)
- goto out_dq;
- }
- set_bit(DQ_READ_B, &dquot->dq_flags);
+ status = ocfs2_qinfo_lock(info, 0);
+ if (status < 0)
+ goto out_dq;
+ /*
+ * We always want to read dquot structure from disk because we don't
+ * know what happened with it while it was on freelist.
+ */
+ status = qtree_read_dquot(&info->dqi_gi, dquot);
+ ocfs2_qinfo_unlock(info, 0);
+ if (status < 0)
+ goto out_dq;
OCFS2_DQUOT(dquot)->dq_use_count++;
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 2e4344be3b9..2001862bf2b 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
out:
- /* Clear the read bit so that next time someone uses this
- * dquot he reads fresh info from disk and allocates local
- * dquot structure */
- clear_bit(DQ_READ_B, &dquot->dq_flags);
return status;
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bf4dfc14bb2..636aab69ead 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -46,6 +46,7 @@
#include <linux/quotaops.h>
#include <linux/namei.h>
#include <linux/mount.h>
+#include <linux/posix_acl.h>
struct ocfs2_cow_context {
struct inode *inode;
@@ -612,6 +613,11 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
}
new_bh = sb_getblk(inode->i_sb, first_blkno);
+ if (!new_bh) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out_commit;
+ }
ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
@@ -1310,7 +1316,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
new_bh = sb_getblk(sb, blkno);
if (new_bh == NULL) {
- ret = -EIO;
+ ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
@@ -1402,10 +1408,9 @@ static void swap_refcount_rec(void *a, void *b, int size)
{
struct ocfs2_refcount_rec *l = a, *r = b, tmp;
- tmp = *(struct ocfs2_refcount_rec *)l;
- *(struct ocfs2_refcount_rec *)l =
- *(struct ocfs2_refcount_rec *)r;
- *(struct ocfs2_refcount_rec *)r = tmp;
+ tmp = *l;
+ *l = *r;
+ *r = tmp;
}
/*
@@ -1561,7 +1566,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
new_bh = sb_getblk(sb, blkno);
if (new_bh == NULL) {
- ret = -EIO;
+ ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
@@ -2502,8 +2507,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
*meta_add += ocfs2_extend_meta_needed(et.et_root_el);
*credits += ocfs2_calc_extend_credits(sb,
- et.et_root_el,
- ref_blocks);
+ et.et_root_el);
} else {
*credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
*meta_add += 1;
@@ -2874,8 +2878,7 @@ static int ocfs2_lock_refcount_allocators(struct super_block *sb,
meta_add =
ocfs2_extend_meta_needed(et->et_root_el);
- *credits += ocfs2_calc_extend_credits(sb, et->et_root_el,
- num_clusters + 2);
+ *credits += ocfs2_calc_extend_credits(sb, et->et_root_el);
ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh,
p_cluster, num_clusters,
@@ -3031,7 +3034,7 @@ int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
for (i = 0; i < blocks; i++, old_block++, new_block++) {
new_bh = sb_getblk(osb->sb, new_block);
if (new_bh == NULL) {
- ret = -EIO;
+ ret = -ENOMEM;
mlog_errno(ret);
break;
}
@@ -3625,8 +3628,7 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh);
*credits += ocfs2_calc_extend_credits(inode->i_sb,
- et.et_root_el,
- ref_blocks);
+ et.et_root_el);
}
out:
@@ -4266,20 +4268,36 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct inode *inode = old_dentry->d_inode;
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
+ struct posix_acl *default_acl, *acl;
+ umode_t mode;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
- error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
+ mode = inode->i_mode;
+ error = posix_acl_create(dir, &mode, &default_acl, &acl);
+ if (error) {
+ mlog_errno(error);
+ goto out;
+ }
+
+ error = ocfs2_create_inode_in_orphan(dir, mode,
&new_orphan_inode);
if (error) {
mlog_errno(error);
goto out;
}
+ error = ocfs2_rw_lock(inode, 1);
+ if (error) {
+ mlog_errno(error);
+ goto out;
+ }
+
error = ocfs2_inode_lock(inode, &old_bh, 1);
if (error) {
mlog_errno(error);
+ ocfs2_rw_unlock(inode, 1);
goto out;
}
@@ -4291,6 +4309,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
up_write(&OCFS2_I(inode)->ip_xattr_sem);
ocfs2_inode_unlock(inode, 1);
+ ocfs2_rw_unlock(inode, 1);
brelse(old_bh);
if (error) {
@@ -4301,11 +4320,16 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
- &new_dentry->d_name);
+ &new_dentry->d_name,
+ default_acl, acl);
if (error)
mlog_errno(error);
}
out:
+ if (default_acl)
+ posix_acl_release(default_acl);
+ if (acl)
+ posix_acl_release(acl);
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index ec55add7604..d5da6f62414 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -53,8 +53,6 @@
*/
static u16 ocfs2_calc_new_backup_super(struct inode *inode,
struct ocfs2_group_desc *gd,
- int new_clusters,
- u32 first_new_cluster,
u16 cl_cpg,
int set)
{
@@ -127,8 +125,6 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
OCFS2_FEATURE_COMPAT_BACKUP_SB)) {
backups = ocfs2_calc_new_backup_super(bm_inode,
group,
- new_clusters,
- first_new_cluster,
cl_cpg, 1);
le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
}
@@ -157,7 +153,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
spin_lock(&OCFS2_I(bm_inode)->ip_lock);
OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
- le64_add_cpu(&fe->i_size, new_clusters << osb->s_clustersize_bits);
+ le64_add_cpu(&fe->i_size, (u64)new_clusters << osb->s_clustersize_bits);
spin_unlock(&OCFS2_I(bm_inode)->ip_lock);
i_size_write(bm_inode, le64_to_cpu(fe->i_size));
@@ -167,8 +163,6 @@ out_rollback:
if (ret < 0) {
ocfs2_calc_new_backup_super(bm_inode,
group,
- new_clusters,
- first_new_cluster,
cl_cpg, 0);
le16_add_cpu(&group->bg_free_bits_count, backups);
le16_add_cpu(&group->bg_bits, -1 * num_bits);
@@ -469,6 +463,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
struct ocfs2_chain_list *cl;
struct ocfs2_chain_rec *cr;
u16 cl_bpc;
+ u64 bg_ptr;
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
@@ -513,7 +508,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
if (ret) {
mlog_errno(ret);
- goto out_unlock;
+ goto out_free_group_bh;
}
trace_ocfs2_group_add((unsigned long long)input->group,
@@ -523,7 +518,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
if (IS_ERR(handle)) {
mlog_errno(PTR_ERR(handle));
ret = -EINVAL;
- goto out_unlock;
+ goto out_free_group_bh;
}
cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
@@ -538,12 +533,14 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
}
group = (struct ocfs2_group_desc *)group_bh->b_data;
+ bg_ptr = le64_to_cpu(group->bg_next_group);
group->bg_next_group = cr->c_blkno;
ocfs2_journal_dirty(handle, group_bh);
ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
+ group->bg_next_group = cpu_to_le64(bg_ptr);
mlog_errno(ret);
goto out_commit;
}
@@ -566,7 +563,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
spin_lock(&OCFS2_I(main_bm_inode)->ip_lock);
OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
- le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits);
+ le64_add_cpu(&fe->i_size, (u64)input->clusters << osb->s_clustersize_bits);
spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock);
i_size_write(main_bm_inode, le64_to_cpu(fe->i_size));
@@ -574,8 +571,11 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
out_commit:
ocfs2_commit_trans(osb, handle);
-out_unlock:
+
+out_free_group_bh:
brelse(group_bh);
+
+out_unlock:
brelse(main_bm_bh);
ocfs2_inode_unlock(main_bm_inode, 1);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index bf1f8930456..1724d43d3da 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -398,7 +398,8 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn)
return 0;
}
-static int o2cb_cluster_this_node(unsigned int *node)
+static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
+ unsigned int *node)
{
int node_num;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 286edf1e231..13a8537d8e8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -23,6 +23,7 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/reboot.h>
+#include <linux/sched.h>
#include <asm/uaccess.h>
#include "stackglue.h"
@@ -102,6 +103,12 @@
#define OCFS2_TEXT_UUID_LEN 32
#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
+#define VERSION_LOCK "version_lock"
+
+enum ocfs2_connection_type {
+ WITH_CONTROLD,
+ NO_CONTROLD
+};
/*
* ocfs2_live_connection is refcounted because the filesystem and
@@ -110,6 +117,13 @@
struct ocfs2_live_connection {
struct list_head oc_list;
struct ocfs2_cluster_connection *oc_conn;
+ enum ocfs2_connection_type oc_type;
+ atomic_t oc_this_node;
+ int oc_our_slot;
+ struct dlm_lksb oc_version_lksb;
+ char oc_lvb[DLM_LVB_LEN];
+ struct completion oc_sync_wait;
+ wait_queue_head_t oc_wait;
};
struct ocfs2_control_private {
@@ -198,20 +212,15 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
* mount path. Since the VFS prevents multiple calls to
* fill_super(), we can't get dupes here.
*/
-static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
- struct ocfs2_live_connection **c_ret)
+static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn,
+ struct ocfs2_live_connection *c)
{
int rc = 0;
- struct ocfs2_live_connection *c;
-
- c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
mutex_lock(&ocfs2_control_lock);
c->oc_conn = conn;
- if (atomic_read(&ocfs2_control_opened))
+ if ((c->oc_type == NO_CONTROLD) || atomic_read(&ocfs2_control_opened))
list_add(&c->oc_list, &ocfs2_live_connection_list);
else {
printk(KERN_ERR
@@ -220,12 +229,6 @@ static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
}
mutex_unlock(&ocfs2_control_lock);
-
- if (!rc)
- *c_ret = c;
- else
- kfree(c);
-
return rc;
}
@@ -799,18 +802,251 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
return 0;
}
+static void lvb_to_version(char *lvb, struct ocfs2_protocol_version *ver)
+{
+ struct ocfs2_protocol_version *pv =
+ (struct ocfs2_protocol_version *)lvb;
+ /*
+ * ocfs2_protocol_version has two u8 variables, so we don't
+ * need any endian conversion.
+ */
+ ver->pv_major = pv->pv_major;
+ ver->pv_minor = pv->pv_minor;
+}
+
+static void version_to_lvb(struct ocfs2_protocol_version *ver, char *lvb)
+{
+ struct ocfs2_protocol_version *pv =
+ (struct ocfs2_protocol_version *)lvb;
+ /*
+ * ocfs2_protocol_version has two u8 variables, so we don't
+ * need any endian conversion.
+ */
+ pv->pv_major = ver->pv_major;
+ pv->pv_minor = ver->pv_minor;
+}
+
+static void sync_wait_cb(void *arg)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ complete(&lc->oc_sync_wait);
+}
+
+static int sync_unlock(struct ocfs2_cluster_connection *conn,
+ struct dlm_lksb *lksb, char *name)
+{
+ int error;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+
+ error = dlm_unlock(conn->cc_lockspace, lksb->sb_lkid, 0, lksb, conn);
+ if (error) {
+ printk(KERN_ERR "%s lkid %x error %d\n",
+ name, lksb->sb_lkid, error);
+ return error;
+ }
+
+ wait_for_completion(&lc->oc_sync_wait);
+
+ if (lksb->sb_status != -DLM_EUNLOCK) {
+ printk(KERN_ERR "%s lkid %x status %d\n",
+ name, lksb->sb_lkid, lksb->sb_status);
+ return -1;
+ }
+ return 0;
+}
+
+static int sync_lock(struct ocfs2_cluster_connection *conn,
+ int mode, uint32_t flags,
+ struct dlm_lksb *lksb, char *name)
+{
+ int error, status;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+
+ error = dlm_lock(conn->cc_lockspace, mode, lksb, flags,
+ name, strlen(name),
+ 0, sync_wait_cb, conn, NULL);
+ if (error) {
+ printk(KERN_ERR "%s lkid %x flags %x mode %d error %d\n",
+ name, lksb->sb_lkid, flags, mode, error);
+ return error;
+ }
+
+ wait_for_completion(&lc->oc_sync_wait);
+
+ status = lksb->sb_status;
+
+ if (status && status != -EAGAIN) {
+ printk(KERN_ERR "%s lkid %x flags %x mode %d status %d\n",
+ name, lksb->sb_lkid, flags, mode, status);
+ }
+
+ return status;
+}
+
+
+static int version_lock(struct ocfs2_cluster_connection *conn, int mode,
+ int flags)
+{
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ return sync_lock(conn, mode, flags,
+ &lc->oc_version_lksb, VERSION_LOCK);
+}
+
+static int version_unlock(struct ocfs2_cluster_connection *conn)
+{
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ return sync_unlock(conn, &lc->oc_version_lksb, VERSION_LOCK);
+}
+
+/* get_protocol_version()
+ *
+ * To exchange ocfs2 versioning, we use the LVB of the version dlm lock.
+ * The algorithm is:
+ * 1. Attempt to take the lock in EX mode (non-blocking).
+ * 2. If successful (which means it is the first mount), write the
+ * version number and downconvert to PR lock.
+ * 3. If unsuccessful (returns -EAGAIN), read the version from the LVB after
+ * taking the PR lock.
+ */
+
+static int get_protocol_version(struct ocfs2_cluster_connection *conn)
+{
+ int ret;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ struct ocfs2_protocol_version pv;
+
+ running_proto.pv_major =
+ ocfs2_user_plugin.sp_max_proto.pv_major;
+ running_proto.pv_minor =
+ ocfs2_user_plugin.sp_max_proto.pv_minor;
+
+ lc->oc_version_lksb.sb_lvbptr = lc->oc_lvb;
+ ret = version_lock(conn, DLM_LOCK_EX,
+ DLM_LKF_VALBLK|DLM_LKF_NOQUEUE);
+ if (!ret) {
+ conn->cc_version.pv_major = running_proto.pv_major;
+ conn->cc_version.pv_minor = running_proto.pv_minor;
+ version_to_lvb(&running_proto, lc->oc_lvb);
+ version_lock(conn, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
+ } else if (ret == -EAGAIN) {
+ ret = version_lock(conn, DLM_LOCK_PR, DLM_LKF_VALBLK);
+ if (ret)
+ goto out;
+ lvb_to_version(lc->oc_lvb, &pv);
+
+ if ((pv.pv_major != running_proto.pv_major) ||
+ (pv.pv_minor > running_proto.pv_minor)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ conn->cc_version.pv_major = pv.pv_major;
+ conn->cc_version.pv_minor = pv.pv_minor;
+ }
+out:
+ return ret;
+}
+
+static void user_recover_prep(void *arg)
+{
+}
+
+static void user_recover_slot(void *arg, struct dlm_slot *slot)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
+ slot->nodeid, slot->slot);
+ conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
+
+}
+
+static void user_recover_done(void *arg, struct dlm_slot *slots,
+ int num_slots, int our_slot,
+ uint32_t generation)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ int i;
+
+ for (i = 0; i < num_slots; i++)
+ if (slots[i].slot == our_slot) {
+ atomic_set(&lc->oc_this_node, slots[i].nodeid);
+ break;
+ }
+
+ lc->oc_our_slot = our_slot;
+ wake_up(&lc->oc_wait);
+}
+
+static const struct dlm_lockspace_ops ocfs2_ls_ops = {
+ .recover_prep = user_recover_prep,
+ .recover_slot = user_recover_slot,
+ .recover_done = user_recover_done,
+};
+
+static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
+{
+ version_unlock(conn);
+ dlm_release_lockspace(conn->cc_lockspace, 2);
+ conn->cc_lockspace = NULL;
+ ocfs2_live_connection_drop(conn->cc_private);
+ conn->cc_private = NULL;
+ return 0;
+}
+
static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
{
dlm_lockspace_t *fsdlm;
- struct ocfs2_live_connection *uninitialized_var(control);
- int rc = 0;
+ struct ocfs2_live_connection *lc;
+ int rc, ops_rv;
BUG_ON(conn == NULL);
- rc = ocfs2_live_connection_new(conn, &control);
+ lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
+ if (!lc) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ init_waitqueue_head(&lc->oc_wait);
+ init_completion(&lc->oc_sync_wait);
+ atomic_set(&lc->oc_this_node, 0);
+ conn->cc_private = lc;
+ lc->oc_type = NO_CONTROLD;
+
+ rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name,
+ DLM_LSFL_FS, DLM_LVB_LEN,
+ &ocfs2_ls_ops, conn, &ops_rv, &fsdlm);
+ if (rc)
+ goto out;
+
+ if (ops_rv == -EOPNOTSUPP) {
+ lc->oc_type = WITH_CONTROLD;
+ printk(KERN_NOTICE "ocfs2: You seem to be using an older "
+ "version of dlm_controld and/or ocfs2-tools."
+ " Please consider upgrading.\n");
+ } else if (ops_rv) {
+ rc = ops_rv;
+ goto out;
+ }
+ conn->cc_lockspace = fsdlm;
+
+ rc = ocfs2_live_connection_attach(conn, lc);
if (rc)
goto out;
+ if (lc->oc_type == NO_CONTROLD) {
+ rc = get_protocol_version(conn);
+ if (rc) {
+ printk(KERN_ERR "ocfs2: Could not determine"
+ " locking version\n");
+ user_cluster_disconnect(conn);
+ goto out;
+ }
+ wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
+ }
+
/*
* running_proto must have been set before we allowed any mounts
* to proceed.
@@ -818,42 +1054,34 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
printk(KERN_ERR
"Unable to mount with fs locking protocol version "
- "%u.%u because the userspace control daemon has "
- "negotiated %u.%u\n",
+ "%u.%u because negotiated protocol is %u.%u\n",
conn->cc_version.pv_major, conn->cc_version.pv_minor,
running_proto.pv_major, running_proto.pv_minor);
rc = -EPROTO;
- ocfs2_live_connection_drop(control);
- goto out;
- }
-
- rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
- NULL, NULL, NULL, &fsdlm);
- if (rc) {
- ocfs2_live_connection_drop(control);
- goto out;
+ ocfs2_live_connection_drop(lc);
+ lc = NULL;
}
- conn->cc_private = control;
- conn->cc_lockspace = fsdlm;
out:
+ if (rc && lc)
+ kfree(lc);
return rc;
}
-static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
-{
- dlm_release_lockspace(conn->cc_lockspace, 2);
- conn->cc_lockspace = NULL;
- ocfs2_live_connection_drop(conn->cc_private);
- conn->cc_private = NULL;
- return 0;
-}
-static int user_cluster_this_node(unsigned int *this_node)
+static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
+ unsigned int *this_node)
{
int rc;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+
+ if (lc->oc_type == WITH_CONTROLD)
+ rc = ocfs2_control_get_this_node();
+ else if (lc->oc_type == NO_CONTROLD)
+ rc = atomic_read(&lc->oc_this_node);
+ else
+ rc = -EINVAL;
- rc = ocfs2_control_get_this_node();
if (rc < 0)
return rc;
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 39abf89697e..5d965e83bd4 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -309,6 +309,8 @@ int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
EXPORT_SYMBOL_GPL(ocfs2_plock);
int ocfs2_cluster_connect(const char *stack_name,
+ const char *cluster_name,
+ int cluster_name_len,
const char *group,
int grouplen,
struct ocfs2_locking_protocol *lproto,
@@ -342,8 +344,12 @@ int ocfs2_cluster_connect(const char *stack_name,
goto out;
}
- memcpy(new_conn->cc_name, group, grouplen);
+ strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
new_conn->cc_namelen = grouplen;
+ if (cluster_name_len)
+ strlcpy(new_conn->cc_cluster_name, cluster_name,
+ CLUSTER_NAME_MAX + 1);
+ new_conn->cc_cluster_name_len = cluster_name_len;
new_conn->cc_recovery_handler = recovery_handler;
new_conn->cc_recovery_data = recovery_data;
@@ -386,8 +392,9 @@ int ocfs2_cluster_connect_agnostic(const char *group,
if (cluster_stack_name[0])
stack_name = cluster_stack_name;
- return ocfs2_cluster_connect(stack_name, group, grouplen, lproto,
- recovery_handler, recovery_data, conn);
+ return ocfs2_cluster_connect(stack_name, NULL, 0, group, grouplen,
+ lproto, recovery_handler, recovery_data,
+ conn);
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic);
@@ -460,9 +467,10 @@ void ocfs2_cluster_hangup(const char *group, int grouplen)
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
-int ocfs2_cluster_this_node(unsigned int *node)
+int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
+ unsigned int *node)
{
- return active_stack->sp_ops->this_node(node);
+ return active_stack->sp_ops->this_node(conn, node);
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
@@ -488,7 +496,7 @@ static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj,
}
static struct kobj_attribute ocfs2_attr_max_locking_protocol =
- __ATTR(max_locking_protocol, S_IFREG | S_IRUGO,
+ __ATTR(max_locking_protocol, S_IRUGO,
ocfs2_max_locking_protocol_show, NULL);
static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
@@ -520,7 +528,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
}
static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins =
- __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO,
+ __ATTR(loaded_cluster_plugins, S_IRUGO,
ocfs2_loaded_cluster_plugins_show, NULL);
static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
@@ -542,7 +550,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
}
static struct kobj_attribute ocfs2_attr_active_cluster_plugin =
- __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO,
+ __ATTR(active_cluster_plugin, S_IRUGO,
ocfs2_active_cluster_plugin_show, NULL);
static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj,
@@ -591,15 +599,29 @@ static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj,
static struct kobj_attribute ocfs2_attr_cluster_stack =
- __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR,
+ __ATTR(cluster_stack, S_IRUGO | S_IWUSR,
ocfs2_cluster_stack_show,
ocfs2_cluster_stack_store);
+
+
+static ssize_t ocfs2_dlm_recover_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "1\n");
+}
+
+static struct kobj_attribute ocfs2_attr_dlm_recover_support =
+ __ATTR(dlm_recover_callback_support, S_IRUGO,
+ ocfs2_dlm_recover_show, NULL);
+
static struct attribute *ocfs2_attrs[] = {
&ocfs2_attr_max_locking_protocol.attr,
&ocfs2_attr_loaded_cluster_plugins.attr,
&ocfs2_attr_active_cluster_plugin.attr,
&ocfs2_attr_cluster_stack.attr,
+ &ocfs2_attr_dlm_recover_support.attr,
NULL,
};
@@ -643,7 +665,7 @@ error:
#define FS_OCFS2_NM 1
-static ctl_table ocfs2_nm_table[] = {
+static struct ctl_table ocfs2_nm_table[] = {
{
.procname = "hb_ctl_path",
.data = ocfs2_hb_ctl_path,
@@ -654,7 +676,7 @@ static ctl_table ocfs2_nm_table[] = {
{ }
};
-static ctl_table ocfs2_mod_table[] = {
+static struct ctl_table ocfs2_mod_table[] = {
{
.procname = "nm",
.data = NULL,
@@ -665,7 +687,7 @@ static ctl_table ocfs2_mod_table[] = {
{ }
};
-static ctl_table ocfs2_kern_table[] = {
+static struct ctl_table ocfs2_kern_table[] = {
{
.procname = "ocfs2",
.data = NULL,
@@ -676,7 +698,7 @@ static ctl_table ocfs2_kern_table[] = {
{ }
};
-static ctl_table ocfs2_root_table[] = {
+static struct ctl_table ocfs2_root_table[] = {
{
.procname = "fs",
.data = NULL,
@@ -687,7 +709,7 @@ static ctl_table ocfs2_root_table[] = {
{ }
};
-static struct ctl_table_header *ocfs2_table_header = NULL;
+static struct ctl_table_header *ocfs2_table_header;
/*
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 1ec56fdb8d0..66334a30cea 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -45,6 +45,9 @@ struct file_lock;
*/
#define GROUP_NAME_MAX 64
+/* This shadows OCFS2_CLUSTER_NAME_LEN */
+#define CLUSTER_NAME_MAX 16
+
/*
* ocfs2_protocol_version changes when ocfs2 does something different in
@@ -97,8 +100,10 @@ struct ocfs2_locking_protocol {
* locking compatibility.
*/
struct ocfs2_cluster_connection {
- char cc_name[GROUP_NAME_MAX];
+ char cc_name[GROUP_NAME_MAX + 1];
int cc_namelen;
+ char cc_cluster_name[CLUSTER_NAME_MAX + 1];
+ int cc_cluster_name_len;
struct ocfs2_protocol_version cc_version;
struct ocfs2_locking_protocol *cc_proto;
void (*cc_recovery_handler)(int node_num, void *recovery_data);
@@ -152,7 +157,8 @@ struct ocfs2_stack_operations {
* ->this_node() returns the cluster's unique identifier for the
* local node.
*/
- int (*this_node)(unsigned int *node);
+ int (*this_node)(struct ocfs2_cluster_connection *conn,
+ unsigned int *node);
/*
* Call the underlying dlm lock function. The ->dlm_lock()
@@ -239,6 +245,8 @@ struct ocfs2_stack_plugin {
/* Used by the filesystem */
int ocfs2_cluster_connect(const char *stack_name,
+ const char *cluster_name,
+ int cluster_name_len,
const char *group,
int grouplen,
struct ocfs2_locking_protocol *lproto,
@@ -260,7 +268,8 @@ int ocfs2_cluster_connect_agnostic(const char *group,
int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
int hangup_pending);
void ocfs2_cluster_hangup(const char *group, int grouplen);
-int ocfs2_cluster_this_node(unsigned int *node);
+int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
+ unsigned int *node);
struct ocfs2_lock_res;
int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 5397c07ce60..0cb889a17ae 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -113,12 +113,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
struct ocfs2_suballoc_result *res);
static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
int nr);
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct ocfs2_group_desc *bg,
- struct buffer_head *group_bh,
- unsigned int bit_off,
- unsigned int num_bits);
static int ocfs2_relink_block_group(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *fe_bh,
@@ -481,7 +475,7 @@ ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
bg_bh = sb_getblk(osb->sb, bg_blkno);
if (!bg_bh) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
@@ -661,7 +655,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
bg_bh = sb_getblk(osb->sb, bg_blkno);
if (!bg_bh) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
@@ -777,6 +771,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
+ ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0);
status = 0;
@@ -1343,7 +1338,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
return status;
}
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
+int ocfs2_block_group_set_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
@@ -1388,8 +1383,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
ocfs2_journal_dirty(handle, group_bh);
bail:
- if (status)
- mlog_errno(status);
return status;
}
@@ -1588,7 +1581,7 @@ static int ocfs2_block_group_search(struct inode *inode,
return ret;
}
-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
handle_t *handle,
struct buffer_head *di_bh,
u32 num_bits,
@@ -1615,6 +1608,21 @@ out:
return ret;
}
+void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
+ struct buffer_head *di_bh,
+ u32 num_bits,
+ u16 chain)
+{
+ u32 tmp_used;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+ struct ocfs2_chain_list *cl;
+
+ cl = (struct ocfs2_chain_list *)&di->id2.i_chain;
+ tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
+ di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits);
+ le32_add_cpu(&cl->cl_recs[chain].c_free, num_bits);
+}
+
static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
struct ocfs2_extent_rec *rec,
struct ocfs2_chain_list *cl)
@@ -1715,8 +1723,12 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
res->sr_bit_offset, res->sr_bits);
- if (ret < 0)
+ if (ret < 0) {
+ ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh,
+ res->sr_bits,
+ le16_to_cpu(gd->bg_chain));
mlog_errno(ret);
+ }
out_loc_only:
*bits_left = le16_to_cpu(gd->bg_free_bits_count);
@@ -1846,6 +1858,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
res->sr_bit_offset,
res->sr_bits);
if (status < 0) {
+ ocfs2_rollback_alloc_dinode_counts(alloc_inode,
+ ac->ac_bh, res->sr_bits, chain);
mlog_errno(status);
goto bail;
}
@@ -2099,7 +2113,7 @@ int ocfs2_find_new_inode_loc(struct inode *dir,
ac->ac_find_loc_priv = res;
*fe_blkno = res->sr_blkno;
-
+ ocfs2_update_inode_fsync_trans(handle, dir, 0);
out:
if (handle)
ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
@@ -2157,6 +2171,8 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle,
res->sr_bit_offset,
res->sr_bits);
if (ret < 0) {
+ ocfs2_rollback_alloc_dinode_counts(ac->ac_inode,
+ ac->ac_bh, res->sr_bits, chain);
mlog_errno(ret);
goto out;
}
@@ -2878,6 +2894,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
if (status < 0) {
mutex_unlock(&inode_alloc_inode->i_mutex);
+ iput(inode_alloc_inode);
mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
(u32)suballoc_slot, status);
goto bail;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a36d0aa5091..2d2501767c0 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -86,6 +86,22 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
u32 bits_wanted,
struct ocfs2_alloc_context **ac);
+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+ handle_t *handle,
+ struct buffer_head *di_bh,
+ u32 num_bits,
+ u16 chain);
+void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
+ struct buffer_head *di_bh,
+ u32 num_bits,
+ u16 chain);
+int ocfs2_block_group_set_bits(handle_t *handle,
+ struct inode *alloc_inode,
+ struct ocfs2_group_desc *bg,
+ struct buffer_head *group_bh,
+ unsigned int bit_off,
+ unsigned int num_bits);
+
int ocfs2_claim_metadata(handle_t *handle,
struct ocfs2_alloc_context *ac,
u32 bits_wanted,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d4e81e4a9b0..ddb662b3244 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -68,7 +68,6 @@
#include "super.h"
#include "sysfile.h"
#include "uptodate.h"
-#include "ver.h"
#include "xattr.h"
#include "quota.h"
#include "refcounttree.h"
@@ -76,7 +75,7 @@
#include "buffer_head_io.h"
-static struct kmem_cache *ocfs2_inode_cachep = NULL;
+static struct kmem_cache *ocfs2_inode_cachep;
struct kmem_cache *ocfs2_dquot_cachep;
struct kmem_cache *ocfs2_qf_chunk_cachep;
@@ -86,10 +85,11 @@ struct kmem_cache *ocfs2_qf_chunk_cachep;
* workqueue and schedule on our own. */
struct workqueue_struct *ocfs2_wq = NULL;
-static struct dentry *ocfs2_debugfs_root = NULL;
+static struct dentry *ocfs2_debugfs_root;
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 cluster file system");
struct mount_options
{
@@ -561,6 +561,9 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
if (!oi)
return NULL;
+ oi->i_sync_tid = 0;
+ oi->i_datasync_tid = 0;
+
jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode);
return &oi->vfs_inode;
}
@@ -631,6 +634,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
struct ocfs2_super *osb = OCFS2_SB(sb);
u32 tmp;
+ sync_filesystem(sb);
+
if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
!ocfs2_check_set_options(sb, &parsed_options)) {
ret = -EINVAL;
@@ -1238,30 +1243,11 @@ static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
}
-static void ocfs2_kill_sb(struct super_block *sb)
-{
- struct ocfs2_super *osb = OCFS2_SB(sb);
-
- /* Failed mount? */
- if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED)
- goto out;
-
- /* Prevent further queueing of inode drop events */
- spin_lock(&dentry_list_lock);
- ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
- spin_unlock(&dentry_list_lock);
- /* Wait for work to finish and/or remove it */
- cancel_work_sync(&osb->dentry_lock_work);
-out:
- kill_block_super(sb);
-}
-
static struct file_system_type ocfs2_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2",
.mount = ocfs2_mount,
- .kill_sb = ocfs2_kill_sb,
-
+ .kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
.next = NULL
};
@@ -1612,16 +1598,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
return 0;
}
-wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
-
static int __init ocfs2_init(void)
{
- int status, i;
-
- ocfs2_print_version();
-
- for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
- init_waitqueue_head(&ocfs2__ioend_wq[i]);
+ int status;
status = init_ocfs2_uptodate_cache();
if (status < 0)
@@ -1763,7 +1742,7 @@ static void ocfs2_inode_init_once(void *data)
ocfs2_extent_map_init(&oi->vfs_inode);
INIT_LIST_HEAD(&oi->ip_io_markers);
oi->ip_dir_start_lookup = 0;
- atomic_set(&oi->ip_unaligned_aio, 0);
+ mutex_init(&oi->ip_unaligned_aio);
init_rwsem(&oi->ip_alloc_sem);
init_rwsem(&oi->ip_xattr_sem);
mutex_init(&oi->ip_io_mutex);
@@ -1848,8 +1827,8 @@ static int ocfs2_get_sector(struct super_block *sb,
*bh = sb_getblk(sb, block);
if (!*bh) {
- mlog_errno(-EIO);
- return -EIO;
+ mlog_errno(-ENOMEM);
+ return -ENOMEM;
}
lock_buffer(*bh);
if (!buffer_dirty(*bh))
@@ -1934,17 +1913,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
debugfs_remove(osb->osb_ctxt);
- /*
- * Flush inode dropping work queue so that deletes are
- * performed while the filesystem is still working
- */
- ocfs2_drop_all_dl_inodes(osb);
-
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
ocfs2_disable_quotas(osb);
+ /* All dquots should be freed by now */
+ WARN_ON(!llist_empty(&osb->dquot_drop_list));
+ /* Wait for worker to be done with the work structure in osb */
+ cancel_work_sync(&osb->dquot_drop_work);
+
ocfs2_shutdown_local_alloc(osb);
ocfs2_truncate_log_shutdown(osb);
@@ -2075,7 +2053,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
struct inode *inode = NULL;
struct ocfs2_journal *journal;
- __le32 uuid_net_key;
struct ocfs2_super *osb;
u64 total_blocks;
@@ -2121,6 +2098,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
spin_lock_init(&osb->osb_xattr_lock);
ocfs2_init_steal_slots(osb);
+ mutex_init(&osb->system_file_mutex);
+
atomic_set(&osb->alloc_stats.moves, 0);
atomic_set(&osb->alloc_stats.local_data, 0);
atomic_set(&osb->alloc_stats.bitmap_data, 0);
@@ -2225,10 +2204,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
if (ocfs2_clusterinfo_valid(osb)) {
osb->osb_stackflags =
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
- memcpy(osb->osb_cluster_stack,
+ strlcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
- OCFS2_STACK_LABEL_LEN);
- osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+ OCFS2_STACK_LABEL_LEN + 1);
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
mlog(ML_ERROR,
"couldn't mount because of an invalid "
@@ -2237,6 +2215,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
status = -EINVAL;
goto bail;
}
+ strlcpy(osb->osb_cluster_name,
+ OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
+ OCFS2_CLUSTER_NAME_LEN + 1);
} else {
/* The empty string is identical with classic tools that
* don't know about s_cluster_info. */
@@ -2272,8 +2253,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
journal->j_state = OCFS2_JOURNAL_FREE;
- INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
- osb->dentry_lock_list = NULL;
+ INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
+ init_llist_head(&osb->dquot_drop_list);
/* get some pseudo constants for clustersize bits */
osb->s_clustersize_bits =
@@ -2307,10 +2288,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto bail;
}
- memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key));
-
- strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
- osb->vol_label[63] = '\0';
+ strlcpy(osb->vol_label, di->id2.i_super.s_label,
+ OCFS2_MAX_VOL_LABEL_LEN);
osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
osb->first_cluster_group_blkno =
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index f053688d22a..af155c18312 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -113,9 +113,11 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
} else
arr = get_local_system_inode(osb, type, slot);
+ mutex_lock(&osb->system_file_mutex);
if (arr && ((inode = *arr) != NULL)) {
/* get a ref in addition to the array ref */
inode = igrab(inode);
+ mutex_unlock(&osb->system_file_mutex);
BUG_ON(!inode);
return inode;
@@ -129,6 +131,7 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
*arr = igrab(inode);
BUG_ON(!*arr);
}
+ mutex_unlock(&osb->system_file_mutex);
return inode;
}
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 52eaf33d346..82e17b076ce 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -67,7 +67,7 @@ struct ocfs2_meta_cache_item {
sector_t c_block;
};
-static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
+static struct kmem_cache *ocfs2_uptodate_cachep;
u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
{
diff --git a/fs/ocfs2/ver.c b/fs/ocfs2/ver.c
deleted file mode 100644
index e2488f4128a..00000000000
--- a/fs/ocfs2/ver.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-#include "ver.h"
-
-#define OCFS2_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION
-
-void ocfs2_print_version(void)
-{
- printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(OCFS2_BUILD_VERSION);
diff --git a/fs/ocfs2/ver.h b/fs/ocfs2/ver.h
deleted file mode 100644
index d7395cb91d2..00000000000
--- a/fs/ocfs2/ver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_VER_H
-#define OCFS2_VER_H
-
-void ocfs2_print_version(void);
-
-#endif /* OCFS2_VER_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6ce0686eab7..016f01df382 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -99,8 +99,8 @@ static struct ocfs2_xattr_def_value_root def_xv = {
const struct xattr_handler *ocfs2_xattr_handlers[] = {
&ocfs2_xattr_user_handler,
- &ocfs2_xattr_acl_access_handler,
- &ocfs2_xattr_acl_default_handler,
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
&ocfs2_xattr_trusted_handler,
&ocfs2_xattr_security_handler,
NULL
@@ -109,9 +109,9 @@ const struct xattr_handler *ocfs2_xattr_handlers[] = {
static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
[OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
- = &ocfs2_xattr_acl_access_handler,
+ = &posix_acl_access_xattr_handler,
[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
- = &ocfs2_xattr_acl_default_handler,
+ = &posix_acl_default_xattr_handler,
[OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
[OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
};
@@ -369,7 +369,7 @@ static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
* them fully.
*/
static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
- u64 xb_blkno)
+ u64 xb_blkno, int new)
{
int i, rc = 0;
@@ -377,15 +377,22 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
xb_blkno + i);
if (!bucket->bu_bhs[i]) {
- rc = -EIO;
+ rc = -ENOMEM;
mlog_errno(rc);
break;
}
if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
- bucket->bu_bhs[i]))
- ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
- bucket->bu_bhs[i]);
+ bucket->bu_bhs[i])) {
+ if (new)
+ ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
+ bucket->bu_bhs[i]);
+ else {
+ set_buffer_uptodate(bucket->bu_bhs[i]);
+ ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
+ bucket->bu_bhs[i]);
+ }
+ }
}
if (rc)
@@ -754,8 +761,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
BUG_ON(why == RESTART_META);
credits = ocfs2_calc_extend_credits(inode->i_sb,
- &vb->vb_xv->xr_list,
- clusters_to_add);
+ &vb->vb_xv->xr_list);
status = ocfs2_extend_trans(handle, credits);
if (status < 0) {
status = -ENOMEM;
@@ -2603,6 +2609,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, di_bh);
out_commit:
@@ -2865,6 +2872,12 @@ static int ocfs2_create_xattr_block(struct inode *inode,
}
new_bh = sb_getblk(inode->i_sb, first_blkno);
+ if (!new_bh) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto end;
+ }
+
ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
@@ -3040,8 +3053,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
clusters_add += new_clusters;
credits += ocfs2_calc_extend_credits(inode->i_sb,
- &def_xv.xv.xr_list,
- new_clusters);
+ &def_xv.xv.xr_list);
}
goto meta_guess;
@@ -3106,8 +3118,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
if (!ocfs2_xattr_is_local(xe))
credits += ocfs2_calc_extend_credits(
inode->i_sb,
- &def_xv.xv.xr_list,
- new_clusters);
+ &def_xv.xv.xr_list);
goto out;
}
}
@@ -3132,9 +3143,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
clusters_add += new_clusters - old_clusters;
credits += ocfs2_calc_extend_credits(inode->i_sb,
- &xv->xr_list,
- new_clusters -
- old_clusters);
+ &xv->xr_list);
if (value_size >= OCFS2_XATTR_ROOT_SIZE)
goto out;
}
@@ -3180,7 +3189,7 @@ meta_guess:
&xb->xb_attrs.xb_root.xt_list;
meta_add += ocfs2_extend_meta_needed(el);
credits += ocfs2_calc_extend_credits(inode->i_sb,
- el, 1);
+ el);
} else
credits += OCFS2_SUBALLOC_ALLOC + 1;
@@ -3199,8 +3208,15 @@ meta_guess:
clusters_add += 1;
}
} else {
- meta_add += 1;
credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+ if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
+ struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
+ meta_add += ocfs2_extend_meta_needed(el);
+ credits += ocfs2_calc_extend_credits(inode->i_sb,
+ el);
+ } else {
+ meta_add += 1;
+ }
}
out:
if (clusters_need)
@@ -3613,6 +3629,7 @@ int ocfs2_xattr_set(struct inode *inode,
}
ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
+ ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
ocfs2_commit_trans(osb, ctxt.handle);
@@ -4293,7 +4310,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
- ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
+ ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4637,7 +4654,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
* Even if !new_bucket_head, we're overwriting t_bucket. Thus,
* there's no need to read it.
*/
- ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
+ ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4803,7 +4820,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
* Even if !t_is_new, we're overwriting t_bucket. Thus,
* there's no need to read it.
*/
- ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
+ ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
if (ret)
goto out;
@@ -5475,6 +5492,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
if (ret)
mlog_errno(ret);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
out_commit:
ocfs2_commit_trans(osb, handle);
@@ -6216,8 +6234,7 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
le16_to_cpu(xv->xr_list.l_next_free_rec);
*credits += ocfs2_calc_extend_credits(sb,
- &def_xv.xv.xr_list,
- le32_to_cpu(xv->xr_clusters));
+ &def_xv.xv.xr_list);
/*
* If the value is a tree with depth > 1, We don't go deep
@@ -6782,7 +6799,7 @@ static int ocfs2_lock_reflink_xattr_rec_allocators(
metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
*credits += ocfs2_calc_extend_credits(osb->sb,
- xt_et->et_root_el, len);
+ xt_et->et_root_el);
if (metas.num_metas) {
ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
@@ -6830,7 +6847,7 @@ static int ocfs2_reflink_xattr_bucket(handle_t *handle,
break;
}
- ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
+ ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
if (ret) {
mlog_errno(ret);
break;
@@ -7190,10 +7207,12 @@ out:
*/
int ocfs2_init_security_and_acl(struct inode *dir,
struct inode *inode,
- const struct qstr *qstr)
+ const struct qstr *qstr,
+ struct posix_acl *default_acl,
+ struct posix_acl *acl)
{
- int ret = 0;
struct buffer_head *dir_bh = NULL;
+ int ret = 0;
ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
if (ret) {
@@ -7207,9 +7226,10 @@ int ocfs2_init_security_and_acl(struct inode *dir,
goto leave;
}
- ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
- if (ret)
- mlog_errno(ret);
+ if (!ret && default_acl)
+ ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ if (!ret && acl)
+ ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS);
ocfs2_inode_unlock(dir, 0);
brelse(dir_bh);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 19f134e896a..f10d5b93c36 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -40,8 +40,6 @@ struct ocfs2_security_xattr_info {
extern const struct xattr_handler ocfs2_xattr_user_handler;
extern const struct xattr_handler ocfs2_xattr_trusted_handler;
extern const struct xattr_handler ocfs2_xattr_security_handler;
-extern const struct xattr_handler ocfs2_xattr_acl_access_handler;
-extern const struct xattr_handler ocfs2_xattr_acl_default_handler;
extern const struct xattr_handler *ocfs2_xattr_handlers[];
ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
@@ -96,5 +94,7 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
bool preserve_security);
int ocfs2_init_security_and_acl(struct inode *dir,
struct inode *inode,
- const struct qstr *qstr);
+ const struct qstr *qstr,
+ struct posix_acl *default_acl,
+ struct posix_acl *acl);
#endif /* OCFS2_XATTR_H */