diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-18 10:15:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-18 10:15:22 -0700 |
commit | e675349e2bdbfb661fa0d8ff2441b4cf48fb7e48 (patch) | |
tree | 7443e324c951f375945905dc436b012c98a00e05 | |
parent | ef38ff9d372d4fe69e415370939a0f1fb5783af1 (diff) | |
parent | 2309e9e040fe29469fb85a384636c455b62fe525 (diff) |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (64 commits)
ocfs2/net: Add debug interface to o2net
ocfs2: Only build ocfs2/dlm with the o2cb stack module
ocfs2/cluster: Get rid of arguments to the timeout routines
ocfs2: Put tree in MAINTAINERS
ocfs2: Use BUG_ON
ocfs2: Convert ocfs2 over to unlocked_ioctl
ocfs2: Improve rename locking
fs/ocfs2/aops.c: test for IS_ERR rather than 0
ocfs2: Add inode stealing for ocfs2_reserve_new_inode
ocfs2: Add ac_alloc_slot in ocfs2_alloc_context
ocfs2: Add a new parameter for ocfs2_reserve_suballoc_bits
ocfs2: Enable cross extent block merge.
ocfs2: Add support for cross extent block
ocfs2: Move /sys/o2cb to /sys/fs/o2cb
sysfs: Allow removal of symlinks in the sysfs root
ocfs2: Reconnect after idle time out.
ocfs2/dlm: Cleanup lockres print
ocfs2/dlm: Fix lockname in lockres print function
ocfs2/dlm: Move dlm_print_one_mle() from dlmmaster.c to dlmdebug.c
ocfs2/dlm: Dumps the purgelist into a debugfs file
...
47 files changed, 5800 insertions, 1042 deletions
diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb new file mode 100644 index 00000000000..9c49d8e6c0c --- /dev/null +++ b/Documentation/ABI/obsolete/o2cb @@ -0,0 +1,11 @@ +What: /sys/o2cb symlink +Date: Dec 2005 +KernelVersion: 2.6.16 +Contact: ocfs2-devel@oss.oracle.com +Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will + be removed when new versions of ocfs2-tools which know to look + in /sys/fs/o2cb are sufficiently prevalent. Don't code new + software to look here, it should try /sys/fs/o2cb instead. + See Documentation/ABI/stable/o2cb for more information on usage. +Users: ocfs2-tools. It's sufficient to mail proposed changes to + ocfs2-devel@oss.oracle.com. diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb new file mode 100644 index 00000000000..5eb1545e0b8 --- /dev/null +++ b/Documentation/ABI/stable/o2cb @@ -0,0 +1,10 @@ +What: /sys/fs/o2cb/ (was /sys/o2cb) +Date: Dec 2005 +KernelVersion: 2.6.16 +Contact: ocfs2-devel@oss.oracle.com +Description: Ocfs2-tools looks at 'interface-revision' for versioning + information. Each logmask/ file controls a set of debug prints + and can be written into with the strings "allow", "deny", or + "off". Reading the file returns the current state. +Users: ocfs2-tools. It's sufficient to mail proposed changes to + ocfs2-devel@oss.oracle.com. diff --git a/Documentation/ABI/testing/sysfs-ocfs2 b/Documentation/ABI/testing/sysfs-ocfs2 new file mode 100644 index 00000000000..b7cc516a8a8 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-ocfs2 @@ -0,0 +1,89 @@ +What: /sys/fs/ocfs2/ +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2 directory contains knobs used by the + ocfs2-tools to interact with the filesystem. + +What: /sys/fs/ocfs2/max_locking_protocol +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2/max_locking_protocol file displays version + of ocfs2 locking supported by the filesystem. This version + covers how ocfs2 uses distributed locking between cluster + nodes. + + The protocol version has a major and minor number. Two + cluster nodes can interoperate if they have an identical + major number and an overlapping minor number - thus, + a node with version 1.10 can interoperate with a node + sporting version 1.8, as long as both use the 1.8 protocol. + + Reading from this file returns a single line, the major + number and minor number joined by a period, eg "1.10". + + This file is read-only. The value is compiled into the + driver. + +What: /sys/fs/ocfs2/loaded_cluster_plugins +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2/loaded_cluster_plugins file describes + the available plugins to support ocfs2 cluster operation. + A cluster plugin is required to use ocfs2 in a cluster. + There are currently two available plugins: + + * 'o2cb' - The classic o2cb cluster stack that ocfs2 has + used since its inception. + * 'user' - A plugin supporting userspace cluster software + in conjunction with fs/dlm. + + Reading from this file returns the names of all loaded + plugins, one per line. + + This file is read-only. Its contents may change as + plugins are loaded or removed. + +What: /sys/fs/ocfs2/active_cluster_plugin +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2/active_cluster_plugin displays which + cluster plugin is currently in use by the filesystem. + The active plugin will appear in the loaded_cluster_plugins + file as well. Only one plugin can be used at a time. + + Reading from this file returns the name of the active plugin + on a single line. + + This file is read-only. Which plugin is active depends on + the cluster stack in use. The contents may change + when all filesystems are unmounted and the cluster stack + is changed. + +What: /sys/fs/ocfs2/cluster_stack +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2/cluster_stack file contains the name + of current ocfs2 cluster stack. This value is set by + userspace tools when bringing the cluster stack online. + + Cluster stack names are 4 characters in length. + + When the 'o2cb' cluster stack is used, the 'o2cb' cluster + plugin is active. All other cluster stacks use the 'user' + cluster plugin. + + Reading from this file returns the name of the current + cluster stack on a single line. + + Writing a new stack name to this file changes the current + cluster stack unless there are mounted ocfs2 filesystems. + If there are mounted filesystems, attempts to change the + stack return an error. + +Users: + ocfs2-tools <ocfs2-tools-devel@oss.oracle.com> diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 164c89394cf..4b70622a8a9 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -318,3 +318,13 @@ Why: Not used in-tree. The current out-of-tree users used it to code / infrastructure should be in the kernel and not in some out-of-tree driver. Who: Thomas Gleixner <tglx@linutronix.de> + +--------------------------- + +What: /sys/o2cb symlink +When: January 2010 +Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb + exists as a symlink for backwards compatibility for old versions of + ocfs2-tools. 2 years should be sufficient time to phase in new versions + which know to look in /sys/fs/o2cb. +Who: ocfs2-devel@oss.oracle.com diff --git a/MAINTAINERS b/MAINTAINERS index 3eceebb48c9..974ee8ddb12 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2952,6 +2952,7 @@ P: Joel Becker M: joel.becker@oracle.com L: ocfs2-devel@oss.oracle.com W: http://oss.oracle.com/projects/ocfs2/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git S: Supported OMNIKEY CARDMAN 4000 DRIVER diff --git a/fs/Kconfig b/fs/Kconfig index c509123bea4..028ae38ecc5 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -444,6 +444,32 @@ config OCFS2_FS For more information on OCFS2, see the file <file:Documentation/filesystems/ocfs2.txt>. +config OCFS2_FS_O2CB + tristate "O2CB Kernelspace Clustering" + depends on OCFS2_FS + default y + help + OCFS2 includes a simple kernelspace clustering package, the OCFS2 + Cluster Base. It only requires a very small userspace component + to configure it. This comes with the standard ocfs2-tools package. + O2CB is limited to maintaining a cluster for OCFS2 file systems. + It cannot manage any other cluster applications. + + It is always safe to say Y here, as the clustering method is + run-time selectable. + +config OCFS2_FS_USERSPACE_CLUSTER + tristate "OCFS2 Userspace Clustering" + depends on OCFS2_FS && DLM + default y + help + This option will allow OCFS2 to use userspace clustering services + in conjunction with the DLM in fs/dlm. If you are using a + userspace cluster manager, say Y here. + + It is safe to say Y, as the clustering method is run-time + selectable. + config OCFS2_DEBUG_MASKLOG bool "OCFS2 logging support" depends on OCFS2_FS diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 4d4ce48bb42..f6956de56fd 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -2,7 +2,12 @@ EXTRA_CFLAGS += -Ifs/ocfs2 EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES -obj-$(CONFIG_OCFS2_FS) += ocfs2.o +obj-$(CONFIG_OCFS2_FS) += \ + ocfs2.o \ + ocfs2_stackglue.o + +obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_stack_o2cb.o +obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o ocfs2-objs := \ alloc.o \ @@ -31,5 +36,10 @@ ocfs2-objs := \ uptodate.o \ ver.o +ocfs2_stackglue-objs := stackglue.o +ocfs2_stack_o2cb-objs := stack_o2cb.o +ocfs2_stack_user-objs := stack_user.o + +# cluster/ is always needed when OCFS2_FS for masklog support obj-$(CONFIG_OCFS2_FS) += cluster/ -obj-$(CONFIG_OCFS2_FS) += dlm/ +obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/ diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 447206eb5c2..41f84c92094 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, BUG_ON(!next_free); /* The tree code before us didn't allow enough room in the leaf. */ - if (el->l_next_free_rec == el->l_count && !has_empty) - BUG(); + BUG_ON(el->l_next_free_rec == el->l_count && !has_empty); /* * The easiest way to approach this is to just remove the @@ -1450,6 +1449,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, * - When our insert into the right path leaf is at the leftmost edge * and requires an update of the path immediately to it's left. This * can occur at the end of some types of rotation and appending inserts. + * - When we've adjusted the last extent record in the left path leaf and the + * 1st extent record in the right path leaf during cross extent block merge. */ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, struct ocfs2_path *left_path, @@ -2712,24 +2713,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, } } +static int ocfs2_get_right_path(struct inode *inode, + struct ocfs2_path *left_path, + struct ocfs2_path **ret_right_path) +{ + int ret; + u32 right_cpos; + struct ocfs2_path *right_path = NULL; + struct ocfs2_extent_list *left_el; + + *ret_right_path = NULL; + + /* This function shouldn't be called for non-trees. */ + BUG_ON(left_path->p_tree_depth == 0); + + left_el = path_leaf_el(left_path); + BUG_ON(left_el->l_next_free_rec != left_el->l_count); + + ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, + &right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* This function shouldn't be called for the rightmost leaf. */ + BUG_ON(right_cpos == 0); + + right_path = ocfs2_new_path(path_root_bh(left_path), + path_root_el(left_path)); + if (!right_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(inode, right_path, right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + *ret_right_path = right_path; +out: + if (ret) + ocfs2_free_path(right_path); + return ret; +} + /* * Remove split_rec clusters from the record at index and merge them - * onto the beginning of the record at index + 1. + * onto the beginning of the record "next" to it. + * For index < l_count - 1, the next means the extent rec at index + 1. + * For index == l_count - 1, the "next" means the 1st extent rec of the + * next extent block. */ -static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, - handle_t *handle, - struct ocfs2_extent_rec *split_rec, - struct ocfs2_extent_list *el, int index) +static int ocfs2_merge_rec_right(struct inode *inode, + struct ocfs2_path *left_path, + handle_t *handle, + struct ocfs2_extent_rec *split_rec, + int index) { - int ret; + int ret, next_free, i; unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); struct ocfs2_extent_rec *left_rec; struct ocfs2_extent_rec *right_rec; + struct ocfs2_extent_list *right_el; + struct ocfs2_path *right_path = NULL; + int subtree_index = 0; + struct ocfs2_extent_list *el = path_leaf_el(left_path); + struct buffer_head *bh = path_leaf_bh(left_path); + struct buffer_head *root_bh = NULL; BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); - left_rec = &el->l_recs[index]; - right_rec = &el->l_recs[index + 1]; + + if (index == le16_to_cpu(el->l_next_free_rec - 1) && + le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { + /* we meet with a cross extent block merge. */ + ret = ocfs2_get_right_path(inode, left_path, &right_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + right_el = path_leaf_el(right_path); + next_free = le16_to_cpu(right_el->l_next_free_rec); + BUG_ON(next_free <= 0); + right_rec = &right_el->l_recs[0]; + if (ocfs2_is_empty_extent(right_rec)) { + BUG_ON(le16_to_cpu(next_free) <= 1); + right_rec = &right_el->l_recs[1]; + } + + BUG_ON(le32_to_cpu(left_rec->e_cpos) + + le16_to_cpu(left_rec->e_leaf_clusters) != + le32_to_cpu(right_rec->e_cpos)); + + subtree_index = ocfs2_find_subtree_root(inode, + left_path, right_path); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_index, + handle->h_buffer_credits, + right_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for (i = subtree_index + 1; + i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + } else { + BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1); + right_rec = &el->l_recs[index + 1]; + } ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -2751,30 +2875,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, if (ret) mlog_errno(ret); + if (right_path) { + ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); + if (ret) + mlog_errno(ret); + + ocfs2_complete_edge_insert(inode, handle, left_path, + right_path, subtree_index); + } +out: + if (right_path) + ocfs2_free_path(right_path); + return ret; +} + +static int ocfs2_get_left_path(struct inode *inode, + struct ocfs2_path *right_path, + struct ocfs2_path **ret_left_path) +{ + int ret; + u32 left_cpos; + struct ocfs2_path *left_path = NULL; + + *ret_left_path = NULL; + + /* This function shouldn't be called for non-trees. */ + BUG_ON(right_path->p_tree_depth == 0); + + ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, + right_path, &left_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* This function shouldn't be called for the leftmost leaf. */ + BUG_ON(left_cpos == 0); + + left_path = ocfs2_new_path(path_root_bh(right_path), + path_root_el(right_path)); + if (!left_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(inode, left_path, left_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + *ret_left_path = left_path; out: + if (ret) + ocfs2_free_path(left_path); return ret; } /* * Remove split_rec clusters from the record at index and merge them - * onto the tail of the record at index - 1. + * onto the tail of the record "before" it. + * For index > 0, the "before" means the extent rec at index - 1. + * + * For index == 0, the "before" means the last record of the previous + * extent block. And there is also a situation that we may need to + * remove the rightmost leaf extent block in the right_path and change + * the right path to indicate the new rightmost path. */ -static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, +static int ocfs2_merge_rec_left(struct inode *inode, + struct ocfs2_path *right_path, handle_t *handle, struct ocfs2_extent_rec *split_rec, - struct ocfs2_extent_list *el, int index) + struct ocfs2_cached_dealloc_ctxt *dealloc, + int index) { - int ret, has_empty_extent = 0; + int ret, i, subtree_index = 0, has_empty_extent = 0; unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); struct ocfs2_extent_rec *left_rec; struct ocfs2_extent_rec *right_rec; + struct ocfs2_extent_list *el = path_leaf_el(right_path); + struct buffer_head *bh = path_leaf_bh(right_path); + struct buffer_head *root_bh = NULL; + struct ocfs2_path *left_path = NULL; + struct ocfs2_extent_list *left_el; - BUG_ON(index <= 0); + BUG_ON(index < 0); - left_rec = &el->l_recs[index - 1]; right_rec = &el->l_recs[index]; - if (ocfs2_is_empty_extent(&el->l_recs[0])) - has_empty_extent = 1; + if (index == 0) { + /* we meet with a cross extent block merge. */ + ret = ocfs2_get_left_path(inode, right_path, &left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + left_el = path_leaf_el(left_path); + BUG_ON(le16_to_cpu(left_el->l_next_free_rec) != + le16_to_cpu(left_el->l_count)); + + left_rec = &left_el->l_recs[ + le16_to_cpu(left_el->l_next_free_rec) - 1]; + BUG_ON(le32_to_cpu(left_rec->e_cpos) + + le16_to_cpu(left_rec->e_leaf_clusters) != + le32_to_cpu(split_rec->e_cpos)); + + subtree_index = ocfs2_find_subtree_root(inode, + left_path, right_path); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_index, + handle->h_buffer_credits, + left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for (i = subtree_index + 1; + i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + } else { + left_rec = &el->l_recs[index - 1]; + if (ocfs2_is_empty_extent(&el->l_recs[0])) + has_empty_extent = 1; + } ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -2790,9 +3040,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, *left_rec = *split_rec; has_empty_extent = 0; - } else { + } else le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); - } le32_add_cpu(&right_rec->e_cpos, split_clusters); le64_add_cpu(&right_rec->e_blkno, @@ -2805,13 +3054,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, if (ret) mlog_errno(ret); + if (left_path) { + ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); + if (ret) + mlog_errno(ret); + + /* + * In the situation that the right_rec is empty and the extent + * block is empty also, ocfs2_complete_edge_insert can't handle + * it and we need to delete the right extent block. + */ + if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && + le16_to_cpu(el->l_next_free_rec) == 1) { + + ret = ocfs2_remove_rightmost_path(inode, handle, + right_path, dealloc); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* Now the rightmost extent block has been deleted. + * So we use the new rightmost path. + */ + ocfs2_mv_path(right_path, left_path); + left_path = NULL; + } else + ocfs2_complete_edge_insert(inode, handle, left_path, + right_path, subtree_index); + } out: + if (left_path) + ocfs2_free_path(left_path); return ret; } static int ocfs2_try_to_merge_extent(struct inode *inode, handle_t *handle, - struct ocfs2_path *left_path, + struct ocfs2_path *path, int split_index, struct ocfs2_extent_rec *split_rec, struct ocfs2_cached_dealloc_ctxt *dealloc, @@ -2819,7 +3099,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, { int ret = 0; - struct ocfs2_extent_list *el = path_leaf_el(left_path); + struct ocfs2_extent_list *el = path_leaf_el(path); struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; BUG_ON(ctxt->c_contig_type == CONTIG_NONE); @@ -2832,7 +3112,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * extents - having more than one in a leaf is * illegal. */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) { mlog_errno(ret); @@ -2847,7 +3127,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * Left-right contig implies this. */ BUG_ON(!ctxt->c_split_covers_rec); - BUG_ON(split_index == 0); /* * Since the leftright insert always covers the entire @@ -2858,9 +3137,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * Since the adding of an empty extent shifts * everything back to the right, there's no need to * update split_index here. + * + * When the split_index is zero, we need to merge it to the + * prevoius extent block. It is more efficient and easier + * if we do merge_right first and merge_left later. */ - ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), - handle, split_rec, el, split_index); + ret = ocfs2_merge_rec_right(inode, path, + handle, split_rec, + split_index); if (ret) { mlog_errno(ret); goto out; @@ -2871,32 +3155,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, */ BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); - /* - * The left merge left us with an empty extent, remove - * it. - */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); + /* The merge left us with an empty extent, remove it. */ + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) { mlog_errno(ret); goto out; } - split_index--; + rec = &el->l_recs[split_index]; /* * Note that we don't pass split_rec here on purpose - - * we've merged it into the left side. + * we've merged it into the rec already. */ - ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), - handle, rec, el, split_index); + ret = ocfs2_merge_rec_left(inode, path, + handle, rec, + dealloc, + split_index); + if (ret) { mlog_err |