aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-18 10:15:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-18 10:15:22 -0700
commite675349e2bdbfb661fa0d8ff2441b4cf48fb7e48 (patch)
tree7443e324c951f375945905dc436b012c98a00e05
parentef38ff9d372d4fe69e415370939a0f1fb5783af1 (diff)
parent2309e9e040fe29469fb85a384636c455b62fe525 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (64 commits) ocfs2/net: Add debug interface to o2net ocfs2: Only build ocfs2/dlm with the o2cb stack module ocfs2/cluster: Get rid of arguments to the timeout routines ocfs2: Put tree in MAINTAINERS ocfs2: Use BUG_ON ocfs2: Convert ocfs2 over to unlocked_ioctl ocfs2: Improve rename locking fs/ocfs2/aops.c: test for IS_ERR rather than 0 ocfs2: Add inode stealing for ocfs2_reserve_new_inode ocfs2: Add ac_alloc_slot in ocfs2_alloc_context ocfs2: Add a new parameter for ocfs2_reserve_suballoc_bits ocfs2: Enable cross extent block merge. ocfs2: Add support for cross extent block ocfs2: Move /sys/o2cb to /sys/fs/o2cb sysfs: Allow removal of symlinks in the sysfs root ocfs2: Reconnect after idle time out. ocfs2/dlm: Cleanup lockres print ocfs2/dlm: Fix lockname in lockres print function ocfs2/dlm: Move dlm_print_one_mle() from dlmmaster.c to dlmdebug.c ocfs2/dlm: Dumps the purgelist into a debugfs file ...
-rw-r--r--Documentation/ABI/obsolete/o2cb11
-rw-r--r--Documentation/ABI/stable/o2cb10
-rw-r--r--Documentation/ABI/testing/sysfs-ocfs289
-rw-r--r--Documentation/feature-removal-schedule.txt10
-rw-r--r--MAINTAINERS1
-rw-r--r--fs/Kconfig26
-rw-r--r--fs/ocfs2/Makefile14
-rw-r--r--fs/ocfs2/alloc.c465
-rw-r--r--fs/ocfs2/aops.c6
-rw-r--r--fs/ocfs2/cluster/Makefile2
-rw-r--r--fs/ocfs2/cluster/netdebug.c441
-rw-r--r--fs/ocfs2/cluster/nodemanager.c5
-rw-r--r--fs/ocfs2/cluster/sys.c9
-rw-r--r--fs/ocfs2/cluster/tcp.c164
-rw-r--r--fs/ocfs2/cluster/tcp.h32
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h26
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h49
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c911
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h86
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c70
-rw-r--r--fs/ocfs2/dlm/dlmlock.c22
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c200
-rw-r--r--fs/ocfs2/dlmglue.c645
-rw-r--r--fs/ocfs2/dlmglue.h5
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/heartbeat.c184
-rw-r--r--fs/ocfs2/heartbeat.h17
-rw-r--r--fs/ocfs2/ioctl.c13
-rw-r--r--fs/ocfs2/ioctl.h3
-rw-r--r--fs/ocfs2/journal.c211
-rw-r--r--fs/ocfs2/journal.h4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/ocfs2.h77
-rw-r--r--fs/ocfs2/ocfs2_fs.h79
-rw-r--r--fs/ocfs2/ocfs2_lockid.h2
-rw-r--r--fs/ocfs2/slot_map.c454
-rw-r--r--fs/ocfs2/slot_map.h32
-rw-r--r--fs/ocfs2/stack_o2cb.c420
-rw-r--r--fs/ocfs2/stack_user.c883
-rw-r--r--fs/ocfs2/stackglue.c568
-rw-r--r--fs/ocfs2/stackglue.h261
-rw-r--r--fs/ocfs2/suballoc.c103
-rw-r--r--fs/ocfs2/suballoc.h1
-rw-r--r--fs/ocfs2/super.c208
-rw-r--r--fs/sysfs/symlink.c9
47 files changed, 5800 insertions, 1042 deletions
diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb
new file mode 100644
index 00000000000..9c49d8e6c0c
--- /dev/null
+++ b/Documentation/ABI/obsolete/o2cb
@@ -0,0 +1,11 @@
+What: /sys/o2cb symlink
+Date: Dec 2005
+KernelVersion: 2.6.16
+Contact: ocfs2-devel@oss.oracle.com
+Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will
+ be removed when new versions of ocfs2-tools which know to look
+ in /sys/fs/o2cb are sufficiently prevalent. Don't code new
+ software to look here, it should try /sys/fs/o2cb instead.
+ See Documentation/ABI/stable/o2cb for more information on usage.
+Users: ocfs2-tools. It's sufficient to mail proposed changes to
+ ocfs2-devel@oss.oracle.com.
diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb
new file mode 100644
index 00000000000..5eb1545e0b8
--- /dev/null
+++ b/Documentation/ABI/stable/o2cb
@@ -0,0 +1,10 @@
+What: /sys/fs/o2cb/ (was /sys/o2cb)
+Date: Dec 2005
+KernelVersion: 2.6.16
+Contact: ocfs2-devel@oss.oracle.com
+Description: Ocfs2-tools looks at 'interface-revision' for versioning
+ information. Each logmask/ file controls a set of debug prints
+ and can be written into with the strings "allow", "deny", or
+ "off". Reading the file returns the current state.
+Users: ocfs2-tools. It's sufficient to mail proposed changes to
+ ocfs2-devel@oss.oracle.com.
diff --git a/Documentation/ABI/testing/sysfs-ocfs2 b/Documentation/ABI/testing/sysfs-ocfs2
new file mode 100644
index 00000000000..b7cc516a8a8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ocfs2
@@ -0,0 +1,89 @@
+What: /sys/fs/ocfs2/
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2 directory contains knobs used by the
+ ocfs2-tools to interact with the filesystem.
+
+What: /sys/fs/ocfs2/max_locking_protocol
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2/max_locking_protocol file displays version
+ of ocfs2 locking supported by the filesystem. This version
+ covers how ocfs2 uses distributed locking between cluster
+ nodes.
+
+ The protocol version has a major and minor number. Two
+ cluster nodes can interoperate if they have an identical
+ major number and an overlapping minor number - thus,
+ a node with version 1.10 can interoperate with a node
+ sporting version 1.8, as long as both use the 1.8 protocol.
+
+ Reading from this file returns a single line, the major
+ number and minor number joined by a period, eg "1.10".
+
+ This file is read-only. The value is compiled into the
+ driver.
+
+What: /sys/fs/ocfs2/loaded_cluster_plugins
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2/loaded_cluster_plugins file describes
+ the available plugins to support ocfs2 cluster operation.
+ A cluster plugin is required to use ocfs2 in a cluster.
+ There are currently two available plugins:
+
+ * 'o2cb' - The classic o2cb cluster stack that ocfs2 has
+ used since its inception.
+ * 'user' - A plugin supporting userspace cluster software
+ in conjunction with fs/dlm.
+
+ Reading from this file returns the names of all loaded
+ plugins, one per line.
+
+ This file is read-only. Its contents may change as
+ plugins are loaded or removed.
+
+What: /sys/fs/ocfs2/active_cluster_plugin
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2/active_cluster_plugin displays which
+ cluster plugin is currently in use by the filesystem.
+ The active plugin will appear in the loaded_cluster_plugins
+ file as well. Only one plugin can be used at a time.
+
+ Reading from this file returns the name of the active plugin
+ on a single line.
+
+ This file is read-only. Which plugin is active depends on
+ the cluster stack in use. The contents may change
+ when all filesystems are unmounted and the cluster stack
+ is changed.
+
+What: /sys/fs/ocfs2/cluster_stack
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2/cluster_stack file contains the name
+ of current ocfs2 cluster stack. This value is set by
+ userspace tools when bringing the cluster stack online.
+
+ Cluster stack names are 4 characters in length.
+
+ When the 'o2cb' cluster stack is used, the 'o2cb' cluster
+ plugin is active. All other cluster stacks use the 'user'
+ cluster plugin.
+
+ Reading from this file returns the name of the current
+ cluster stack on a single line.
+
+ Writing a new stack name to this file changes the current
+ cluster stack unless there are mounted ocfs2 filesystems.
+ If there are mounted filesystems, attempts to change the
+ stack return an error.
+
+Users:
+ ocfs2-tools <ocfs2-tools-devel@oss.oracle.com>
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 164c89394cf..4b70622a8a9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -318,3 +318,13 @@ Why: Not used in-tree. The current out-of-tree users used it to
code / infrastructure should be in the kernel and not in some
out-of-tree driver.
Who: Thomas Gleixner <tglx@linutronix.de>
+
+---------------------------
+
+What: /sys/o2cb symlink
+When: January 2010
+Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb
+ exists as a symlink for backwards compatibility for old versions of
+ ocfs2-tools. 2 years should be sufficient time to phase in new versions
+ which know to look in /sys/fs/o2cb.
+Who: ocfs2-devel@oss.oracle.com
diff --git a/MAINTAINERS b/MAINTAINERS
index 3eceebb48c9..974ee8ddb12 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2952,6 +2952,7 @@ P: Joel Becker
M: joel.becker@oracle.com
L: ocfs2-devel@oss.oracle.com
W: http://oss.oracle.com/projects/ocfs2/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git
S: Supported
OMNIKEY CARDMAN 4000 DRIVER
diff --git a/fs/Kconfig b/fs/Kconfig
index c509123bea4..028ae38ecc5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -444,6 +444,32 @@ config OCFS2_FS
For more information on OCFS2, see the file
<file:Documentation/filesystems/ocfs2.txt>.
+config OCFS2_FS_O2CB
+ tristate "O2CB Kernelspace Clustering"
+ depends on OCFS2_FS
+ default y
+ help
+ OCFS2 includes a simple kernelspace clustering package, the OCFS2
+ Cluster Base. It only requires a very small userspace component
+ to configure it. This comes with the standard ocfs2-tools package.
+ O2CB is limited to maintaining a cluster for OCFS2 file systems.
+ It cannot manage any other cluster applications.
+
+ It is always safe to say Y here, as the clustering method is
+ run-time selectable.
+
+config OCFS2_FS_USERSPACE_CLUSTER
+ tristate "OCFS2 Userspace Clustering"
+ depends on OCFS2_FS && DLM
+ default y
+ help
+ This option will allow OCFS2 to use userspace clustering services
+ in conjunction with the DLM in fs/dlm. If you are using a
+ userspace cluster manager, say Y here.
+
+ It is safe to say Y, as the clustering method is run-time
+ selectable.
+
config OCFS2_DEBUG_MASKLOG
bool "OCFS2 logging support"
depends on OCFS2_FS
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 4d4ce48bb42..f6956de56fd 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -2,7 +2,12 @@ EXTRA_CFLAGS += -Ifs/ocfs2
EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES
-obj-$(CONFIG_OCFS2_FS) += ocfs2.o
+obj-$(CONFIG_OCFS2_FS) += \
+ ocfs2.o \
+ ocfs2_stackglue.o
+
+obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_stack_o2cb.o
+obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o
ocfs2-objs := \
alloc.o \
@@ -31,5 +36,10 @@ ocfs2-objs := \
uptodate.o \
ver.o
+ocfs2_stackglue-objs := stackglue.o
+ocfs2_stack_o2cb-objs := stack_o2cb.o
+ocfs2_stack_user-objs := stack_user.o
+
+# cluster/ is always needed when OCFS2_FS for masklog support
obj-$(CONFIG_OCFS2_FS) += cluster/
-obj-$(CONFIG_OCFS2_FS) += dlm/
+obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 447206eb5c2..41f84c92094 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
BUG_ON(!next_free);
/* The tree code before us didn't allow enough room in the leaf. */
- if (el->l_next_free_rec == el->l_count && !has_empty)
- BUG();
+ BUG_ON(el->l_next_free_rec == el->l_count && !has_empty);
/*
* The easiest way to approach this is to just remove the
@@ -1450,6 +1449,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
* - When our insert into the right path leaf is at the leftmost edge
* and requires an update of the path immediately to it's left. This
* can occur at the end of some types of rotation and appending inserts.
+ * - When we've adjusted the last extent record in the left path leaf and the
+ * 1st extent record in the right path leaf during cross extent block merge.
*/
static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
struct ocfs2_path *left_path,
@@ -2712,24 +2713,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
}
}
+static int ocfs2_get_right_path(struct inode *inode,
+ struct ocfs2_path *left_path,
+ struct ocfs2_path **ret_right_path)
+{
+ int ret;
+ u32 right_cpos;
+ struct ocfs2_path *right_path = NULL;
+ struct ocfs2_extent_list *left_el;
+
+ *ret_right_path = NULL;
+
+ /* This function shouldn't be called for non-trees. */
+ BUG_ON(left_path->p_tree_depth == 0);
+
+ left_el = path_leaf_el(left_path);
+ BUG_ON(left_el->l_next_free_rec != left_el->l_count);
+
+ ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
+ &right_cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* This function shouldn't be called for the rightmost leaf. */
+ BUG_ON(right_cpos == 0);
+
+ right_path = ocfs2_new_path(path_root_bh(left_path),
+ path_root_el(left_path));
+ if (!right_path) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_find_path(inode, right_path, right_cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ *ret_right_path = right_path;
+out:
+ if (ret)
+ ocfs2_free_path(right_path);
+ return ret;
+}
+
/*
* Remove split_rec clusters from the record at index and merge them
- * onto the beginning of the record at index + 1.
+ * onto the beginning of the record "next" to it.
+ * For index < l_count - 1, the next means the extent rec at index + 1.
+ * For index == l_count - 1, the "next" means the 1st extent rec of the
+ * next extent block.
*/
-static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
- handle_t *handle,
- struct ocfs2_extent_rec *split_rec,
- struct ocfs2_extent_list *el, int index)
+static int ocfs2_merge_rec_right(struct inode *inode,
+ struct ocfs2_path *left_path,
+ handle_t *handle,
+ struct ocfs2_extent_rec *split_rec,
+ int index)
{
- int ret;
+ int ret, next_free, i;
unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
struct ocfs2_extent_rec *left_rec;
struct ocfs2_extent_rec *right_rec;
+ struct ocfs2_extent_list *right_el;
+ struct ocfs2_path *right_path = NULL;
+ int subtree_index = 0;
+ struct ocfs2_extent_list *el = path_leaf_el(left_path);
+ struct buffer_head *bh = path_leaf_bh(left_path);
+ struct buffer_head *root_bh = NULL;
BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
-
left_rec = &el->l_recs[index];
- right_rec = &el->l_recs[index + 1];
+
+ if (index == le16_to_cpu(el->l_next_free_rec - 1) &&
+ le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
+ /* we meet with a cross extent block merge. */
+ ret = ocfs2_get_right_path(inode, left_path, &right_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ right_el = path_leaf_el(right_path);
+ next_free = le16_to_cpu(right_el->l_next_free_rec);
+ BUG_ON(next_free <= 0);
+ right_rec = &right_el->l_recs[0];
+ if (ocfs2_is_empty_extent(right_rec)) {
+ BUG_ON(le16_to_cpu(next_free) <= 1);
+ right_rec = &right_el->l_recs[1];
+ }
+
+ BUG_ON(le32_to_cpu(left_rec->e_cpos) +
+ le16_to_cpu(left_rec->e_leaf_clusters) !=
+ le32_to_cpu(right_rec->e_cpos));
+
+ subtree_index = ocfs2_find_subtree_root(inode,
+ left_path, right_path);
+
+ ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
+ handle->h_buffer_credits,
+ right_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ root_bh = left_path->p_node[subtree_index].bh;
+ BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
+
+ ret = ocfs2_journal_access(handle, inode, root_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ for (i = subtree_index + 1;
+ i < path_num_items(right_path); i++) {
+ ret = ocfs2_journal_access(handle, inode,
+ right_path->p_node[i].bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_journal_access(handle, inode,
+ left_path->p_node[i].bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ } else {
+ BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1);
+ right_rec = &el->l_recs[index + 1];
+ }
ret = ocfs2_journal_access(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2751,30 +2875,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
if (ret)
mlog_errno(ret);
+ if (right_path) {
+ ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
+ if (ret)
+ mlog_errno(ret);
+
+ ocfs2_complete_edge_insert(inode, handle, left_path,
+ right_path, subtree_index);
+ }
+out:
+ if (right_path)
+ ocfs2_free_path(right_path);
+ return ret;
+}
+
+static int ocfs2_get_left_path(struct inode *inode,
+ struct ocfs2_path *right_path,
+ struct ocfs2_path **ret_left_path)
+{
+ int ret;
+ u32 left_cpos;
+ struct ocfs2_path *left_path = NULL;
+
+ *ret_left_path = NULL;
+
+ /* This function shouldn't be called for non-trees. */
+ BUG_ON(right_path->p_tree_depth == 0);
+
+ ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
+ right_path, &left_cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* This function shouldn't be called for the leftmost leaf. */
+ BUG_ON(left_cpos == 0);
+
+ left_path = ocfs2_new_path(path_root_bh(right_path),
+ path_root_el(right_path));
+ if (!left_path) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_find_path(inode, left_path, left_cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ *ret_left_path = left_path;
out:
+ if (ret)
+ ocfs2_free_path(left_path);
return ret;
}
/*
* Remove split_rec clusters from the record at index and merge them
- * onto the tail of the record at index - 1.
+ * onto the tail of the record "before" it.
+ * For index > 0, the "before" means the extent rec at index - 1.
+ *
+ * For index == 0, the "before" means the last record of the previous
+ * extent block. And there is also a situation that we may need to
+ * remove the rightmost leaf extent block in the right_path and change
+ * the right path to indicate the new rightmost path.
*/
-static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
+static int ocfs2_merge_rec_left(struct inode *inode,
+ struct ocfs2_path *right_path,
handle_t *handle,
struct ocfs2_extent_rec *split_rec,
- struct ocfs2_extent_list *el, int index)
+ struct ocfs2_cached_dealloc_ctxt *dealloc,
+ int index)
{
- int ret, has_empty_extent = 0;
+ int ret, i, subtree_index = 0, has_empty_extent = 0;
unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
struct ocfs2_extent_rec *left_rec;
struct ocfs2_extent_rec *right_rec;
+ struct ocfs2_extent_list *el = path_leaf_el(right_path);
+ struct buffer_head *bh = path_leaf_bh(right_path);
+ struct buffer_head *root_bh = NULL;
+ struct ocfs2_path *left_path = NULL;
+ struct ocfs2_extent_list *left_el;
- BUG_ON(index <= 0);
+ BUG_ON(index < 0);
- left_rec = &el->l_recs[index - 1];
right_rec = &el->l_recs[index];
- if (ocfs2_is_empty_extent(&el->l_recs[0]))
- has_empty_extent = 1;
+ if (index == 0) {
+ /* we meet with a cross extent block merge. */
+ ret = ocfs2_get_left_path(inode, right_path, &left_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ left_el = path_leaf_el(left_path);
+ BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
+ le16_to_cpu(left_el->l_count));
+
+ left_rec = &left_el->l_recs[
+ le16_to_cpu(left_el->l_next_free_rec) - 1];
+ BUG_ON(le32_to_cpu(left_rec->e_cpos) +
+ le16_to_cpu(left_rec->e_leaf_clusters) !=
+ le32_to_cpu(split_rec->e_cpos));
+
+ subtree_index = ocfs2_find_subtree_root(inode,
+ left_path, right_path);
+
+ ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
+ handle->h_buffer_credits,
+ left_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ root_bh = left_path->p_node[subtree_index].bh;
+ BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
+
+ ret = ocfs2_journal_access(handle, inode, root_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ for (i = subtree_index + 1;
+ i < path_num_items(right_path); i++) {
+ ret = ocfs2_journal_access(handle, inode,
+ right_path->p_node[i].bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_journal_access(handle, inode,
+ left_path->p_node[i].bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+ } else {
+ left_rec = &el->l_recs[index - 1];
+ if (ocfs2_is_empty_extent(&el->l_recs[0]))
+ has_empty_extent = 1;
+ }
ret = ocfs2_journal_access(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2790,9 +3040,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
*left_rec = *split_rec;
has_empty_extent = 0;
- } else {
+ } else
le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
- }
le32_add_cpu(&right_rec->e_cpos, split_clusters);
le64_add_cpu(&right_rec->e_blkno,
@@ -2805,13 +3054,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
if (ret)
mlog_errno(ret);
+ if (left_path) {
+ ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
+ if (ret)
+ mlog_errno(ret);
+
+ /*
+ * In the situation that the right_rec is empty and the extent
+ * block is empty also, ocfs2_complete_edge_insert can't handle
+ * it and we need to delete the right extent block.
+ */
+ if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
+ le16_to_cpu(el->l_next_free_rec) == 1) {
+
+ ret = ocfs2_remove_rightmost_path(inode, handle,
+ right_path, dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* Now the rightmost extent block has been deleted.
+ * So we use the new rightmost path.
+ */
+ ocfs2_mv_path(right_path, left_path);
+ left_path = NULL;
+ } else
+ ocfs2_complete_edge_insert(inode, handle, left_path,
+ right_path, subtree_index);
+ }
out:
+ if (left_path)
+ ocfs2_free_path(left_path);
return ret;
}
static int ocfs2_try_to_merge_extent(struct inode *inode,
handle_t *handle,
- struct ocfs2_path *left_path,
+ struct ocfs2_path *path,
int split_index,
struct ocfs2_extent_rec *split_rec,
struct ocfs2_cached_dealloc_ctxt *dealloc,
@@ -2819,7 +3099,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
{
int ret = 0;
- struct ocfs2_extent_list *el = path_leaf_el(left_path);
+ struct ocfs2_extent_list *el = path_leaf_el(path);
struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
@@ -2832,7 +3112,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* extents - having more than one in a leaf is
* illegal.
*/
- ret = ocfs2_rotate_tree_left(inode, handle, left_path,
+ ret = ocfs2_rotate_tree_left(inode, handle, path,
dealloc);
if (ret) {
mlog_errno(ret);
@@ -2847,7 +3127,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* Left-right contig implies this.
*/
BUG_ON(!ctxt->c_split_covers_rec);
- BUG_ON(split_index == 0);
/*
* Since the leftright insert always covers the entire
@@ -2858,9 +3137,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* Since the adding of an empty extent shifts
* everything back to the right, there's no need to
* update split_index here.
+ *
+ * When the split_index is zero, we need to merge it to the
+ * prevoius extent block. It is more efficient and easier
+ * if we do merge_right first and merge_left later.
*/
- ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
- handle, split_rec, el, split_index);
+ ret = ocfs2_merge_rec_right(inode, path,
+ handle, split_rec,
+ split_index);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2871,32 +3155,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
*/
BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
- /*
- * The left merge left us with an empty extent, remove
- * it.
- */
- ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc);
+ /* The merge left us with an empty extent, remove it. */
+ ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
if (ret) {
mlog_errno(ret);
goto out;
}
- split_index--;
+
rec = &el->l_recs[split_index];
/*
* Note that we don't pass split_rec here on purpose -
- * we've merged it into the left side.
+ * we've merged it into the rec already.
*/
- ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path),
- handle, rec, el, split_index);
+ ret = ocfs2_merge_rec_left(inode, path,
+ handle, rec,
+ dealloc,
+ split_index);
+
if (ret) {
mlog_err