aboutsummaryrefslogtreecommitdiff
path: root/fs/ocfs2/alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/alloc.c')
-rw-r--r--fs/ocfs2/alloc.c4003
1 files changed, 2525 insertions, 1478 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 447206eb5c2..9d8fcf2f3b9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -28,14 +28,16 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/swap.h>
+#include <linux/quotaops.h>
+#include <linux/blkdev.h>
-#define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h>
#include "ocfs2.h"
#include "alloc.h"
#include "aops.h"
+#include "blockcheck.h"
#include "dlmglue.h"
#include "extent_map.h"
#include "inode.h"
@@ -46,44 +48,536 @@
#include "file.h"
#include "super.h"
#include "uptodate.h"
+#include "xattr.h"
+#include "refcounttree.h"
+#include "ocfs2_trace.h"
#include "buffer_head_io.h"
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
-static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
- struct ocfs2_extent_block *eb);
+enum ocfs2_contig_type {
+ CONTIG_NONE = 0,
+ CONTIG_LEFT,
+ CONTIG_RIGHT,
+ CONTIG_LEFTRIGHT,
+};
+static enum ocfs2_contig_type
+ ocfs2_extent_rec_contig(struct super_block *sb,
+ struct ocfs2_extent_rec *ext,
+ struct ocfs2_extent_rec *insert_rec);
/*
- * Structures which describe a path through a btree, and functions to
- * manipulate them.
+ * Operations for a specific extent tree type.
*
- * The idea here is to be as generic as possible with the tree
- * manipulation code.
+ * To implement an on-disk btree (extent tree) type in ocfs2, add
+ * an ocfs2_extent_tree_operations structure and the matching
+ * ocfs2_init_<thingy>_extent_tree() function. That's pretty much it
+ * for the allocation portion of the extent tree.
+ */
+struct ocfs2_extent_tree_operations {
+ /*
+ * last_eb_blk is the block number of the right most leaf extent
+ * block. Most on-disk structures containing an extent tree store
+ * this value for fast access. The ->eo_set_last_eb_blk() and
+ * ->eo_get_last_eb_blk() operations access this value. They are
+ * both required.
+ */
+ void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et,
+ u64 blkno);
+ u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et);
+
+ /*
+ * The on-disk structure usually keeps track of how many total
+ * clusters are stored in this extent tree. This function updates
+ * that value. new_clusters is the delta, and must be
+ * added to the total. Required.
+ */
+ void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
+ u32 new_clusters);
+
+ /*
+ * If this extent tree is supported by an extent map, insert
+ * a record into the map.
+ */
+ void (*eo_extent_map_insert)(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec);
+
+ /*
+ * If this extent tree is supported by an extent map, truncate the
+ * map to clusters,
+ */
+ void (*eo_extent_map_truncate)(struct ocfs2_extent_tree *et,
+ u32 clusters);
+
+ /*
+ * If ->eo_insert_check() exists, it is called before rec is
+ * inserted into the extent tree. It is optional.
+ */
+ int (*eo_insert_check)(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec);
+ int (*eo_sanity_check)(struct ocfs2_extent_tree *et);
+
+ /*
+ * --------------------------------------------------------------
+ * The remaining are internal to ocfs2_extent_tree and don't have
+ * accessor functions
+ */
+
+ /*
+ * ->eo_fill_root_el() takes et->et_object and sets et->et_root_el.
+ * It is required.
+ */
+ void (*eo_fill_root_el)(struct ocfs2_extent_tree *et);
+
+ /*
+ * ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if
+ * it exists. If it does not, et->et_max_leaf_clusters is set
+ * to 0 (unlimited). Optional.
+ */
+ void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
+
+ /*
+ * ->eo_extent_contig test whether the 2 ocfs2_extent_rec
+ * are contiguous or not. Optional. Don't need to set it if use
+ * ocfs2_extent_rec as the tree leaf.
+ */
+ enum ocfs2_contig_type
+ (*eo_extent_contig)(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *ext,
+ struct ocfs2_extent_rec *insert_rec);
+};
+
+
+/*
+ * Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check
+ * in the methods.
*/
-struct ocfs2_path_item {
- struct buffer_head *bh;
- struct ocfs2_extent_list *el;
+static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
+static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 blkno);
+static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
+ u32 clusters);
+static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec);
+static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
+ u32 clusters);
+static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec);
+static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
+static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
+static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
+ .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
+ .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
+ .eo_update_clusters = ocfs2_dinode_update_clusters,
+ .eo_extent_map_insert = ocfs2_dinode_extent_map_insert,
+ .eo_extent_map_truncate = ocfs2_dinode_extent_map_truncate,
+ .eo_insert_check = ocfs2_dinode_insert_check,
+ .eo_sanity_check = ocfs2_dinode_sanity_check,
+ .eo_fill_root_el = ocfs2_dinode_fill_root_el,
+};
+
+static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 blkno)
+{
+ struct ocfs2_dinode *di = et->et_object;
+
+ BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
+ di->i_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dinode *di = et->et_object;
+
+ BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
+ return le64_to_cpu(di->i_last_eb_blk);
+}
+
+static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
+ struct ocfs2_dinode *di = et->et_object;
+
+ le32_add_cpu(&di->i_clusters, clusters);
+ spin_lock(&oi->ip_lock);
+ oi->ip_clusters = le32_to_cpu(di->i_clusters);
+ spin_unlock(&oi->ip_lock);
+}
+
+static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec)
+{
+ struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
+
+ ocfs2_extent_map_insert_rec(inode, rec);
+}
+
+static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
+
+ ocfs2_extent_map_trunc(inode, clusters);
+}
+
+static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec)
+{
+ struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
+ struct ocfs2_super *osb = OCFS2_SB(oi->vfs_inode.i_sb);
+
+ BUG_ON(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL);
+ mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
+ (oi->ip_clusters != le32_to_cpu(rec->e_cpos)),
+ "Device %s, asking for sparse allocation: inode %llu, "
+ "cpos %u, clusters %u\n",
+ osb->dev_str,
+ (unsigned long long)oi->ip_blkno,
+ rec->e_cpos, oi->ip_clusters);
+
+ return 0;
+}
+
+static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dinode *di = et->et_object;
+
+ BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ return 0;
+}
+
+static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dinode *di = et->et_object;
+
+ et->et_root_el = &di->id2.i_list;
+}
+
+
+static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_xattr_value_buf *vb = et->et_object;
+
+ et->et_root_el = &vb->vb_xv->xr_list;
+}
+
+static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 blkno)
+{
+ struct ocfs2_xattr_value_buf *vb = et->et_object;
+
+ vb->vb_xv->xr_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_xattr_value_buf *vb = et->et_object;
+
+ return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
+}
+
+static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ struct ocfs2_xattr_value_buf *vb = et->et_object;
+
+ le32_add_cpu(&vb->vb_xv->xr_clusters, clusters);
+}
+
+static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
+ .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk,
+ .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk,
+ .eo_update_clusters = ocfs2_xattr_value_update_clusters,
+ .eo_fill_root_el = ocfs2_xattr_value_fill_root_el,
};
-#define OCFS2_MAX_PATH_DEPTH 5
+static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_xattr_block *xb = et->et_object;
+
+ et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
+}
+
+static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct ocfs2_extent_tree *et)
+{
+ struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
+ et->et_max_leaf_clusters =
+ ocfs2_clusters_for_bytes(sb, OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
+}
+
+static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 blkno)
+{
+ struct ocfs2_xattr_block *xb = et->et_object;
+ struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
+
+ xt->xt_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_xattr_block *xb = et->et_object;
+ struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
+
+ return le64_to_cpu(xt->xt_last_eb_blk);
+}
+
+static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ struct ocfs2_xattr_block *xb = et->et_object;
+
+ le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters);
+}
-struct ocfs2_path {
- int p_tree_depth;
- struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
+static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
+ .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk,
+ .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk,
+ .eo_update_clusters = ocfs2_xattr_tree_update_clusters,
+ .eo_fill_root_el = ocfs2_xattr_tree_fill_root_el,
+ .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
};
-#define path_root_bh(_path) ((_path)->p_node[0].bh)
-#define path_root_el(_path) ((_path)->p_node[0].el)
-#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
-#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
-#define path_num_items(_path) ((_path)->p_tree_depth + 1)
+static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 blkno)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ return le64_to_cpu(dx_root->dr_last_eb_blk);
+}
+
+static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ le32_add_cpu(&dx_root->dr_clusters, clusters);
+}
+
+static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
+
+ return 0;
+}
+static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ et->et_root_el = &dx_root->dr_list;
+}
+
+static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
+ .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk,
+ .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk,
+ .eo_update_clusters = ocfs2_dx_root_update_clusters,
+ .eo_sanity_check = ocfs2_dx_root_sanity_check,
+ .eo_fill_root_el = ocfs2_dx_root_fill_root_el,
+};
+
+static void ocfs2_refcount_tree_fill_root_el(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_refcount_block *rb = et->et_object;
+
+ et->et_root_el = &rb->rf_list;
+}
+
+static void ocfs2_refcount_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 blkno)
+{
+ struct ocfs2_refcount_block *rb = et->et_object;
+
+ rb->rf_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_refcount_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_refcount_block *rb = et->et_object;
+
+ return le64_to_cpu(rb->rf_last_eb_blk);
+}
+
+static void ocfs2_refcount_tree_update_clusters(struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ struct ocfs2_refcount_block *rb = et->et_object;
+
+ le32_add_cpu(&rb->rf_clusters, clusters);
+}
+
+static enum ocfs2_contig_type
+ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *ext,
+ struct ocfs2_extent_rec *insert_rec)
+{
+ return CONTIG_NONE;
+}
+
+static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
+ .eo_set_last_eb_blk = ocfs2_refcount_tree_set_last_eb_blk,
+ .eo_get_last_eb_blk = ocfs2_refcount_tree_get_last_eb_blk,
+ .eo_update_clusters = ocfs2_refcount_tree_update_clusters,
+ .eo_fill_root_el = ocfs2_refcount_tree_fill_root_el,
+ .eo_extent_contig = ocfs2_refcount_tree_extent_contig,
+};
+
+static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
+ struct ocfs2_caching_info *ci,
+ struct buffer_head *bh,
+ ocfs2_journal_access_func access,
+ void *obj,
+ struct ocfs2_extent_tree_operations *ops)
+{
+ et->et_ops = ops;
+ et->et_root_bh = bh;
+ et->et_ci = ci;
+ et->et_root_journal_access = access;
+ if (!obj)
+ obj = (void *)bh->b_data;
+ et->et_object = obj;
+
+ et->et_ops->eo_fill_root_el(et);
+ if (!et->et_ops->eo_fill_max_leaf_clusters)
+ et->et_max_leaf_clusters = 0;
+ else
+ et->et_ops->eo_fill_max_leaf_clusters(et);
+}
+
+void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
+ struct ocfs2_caching_info *ci,
+ struct buffer_head *bh)
+{
+ __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_di,
+ NULL, &ocfs2_dinode_et_ops);
+}
+
+void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
+ struct ocfs2_caching_info *ci,
+ struct buffer_head *bh)
+{
+ __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_xb,
+ NULL, &ocfs2_xattr_tree_et_ops);
+}
+
+void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
+ struct ocfs2_caching_info *ci,
+ struct ocfs2_xattr_value_buf *vb)
+{
+ __ocfs2_init_extent_tree(et, ci, vb->vb_bh, vb->vb_access, vb,
+ &ocfs2_xattr_value_et_ops);
+}
+
+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
+ struct ocfs2_caching_info *ci,
+ struct buffer_head *bh)
+{
+ __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_dr,
+ NULL, &ocfs2_dx_root_et_ops);
+}
+
+void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
+ struct ocfs2_caching_info *ci,
+ struct buffer_head *bh)
+{
+ __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_rb,
+ NULL, &ocfs2_refcount_tree_et_ops);
+}
+
+static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 new_last_eb_blk)
+{
+ et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk);
+}
+
+static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+ return et->et_ops->eo_get_last_eb_blk(et);
+}
+
+static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ et->et_ops->eo_update_clusters(et, clusters);
+}
+
+static inline void ocfs2_et_extent_map_insert(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec)
+{
+ if (et->et_ops->eo_extent_map_insert)
+ et->et_ops->eo_extent_map_insert(et, rec);
+}
+
+static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ if (et->et_ops->eo_extent_map_truncate)
+ et->et_ops->eo_extent_map_truncate(et, clusters);
+}
+
+static inline int ocfs2_et_root_journal_access(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ int type)
+{
+ return et->et_root_journal_access(handle, et->et_ci, et->et_root_bh,
+ type);
+}
+
+static inline enum ocfs2_contig_type
+ ocfs2_et_extent_contig(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec,
+ struct ocfs2_extent_rec *insert_rec)
+{
+ if (et->et_ops->eo_extent_contig)
+ return et->et_ops->eo_extent_contig(et, rec, insert_rec);
+
+ return ocfs2_extent_rec_contig(
+ ocfs2_metadata_cache_get_super(et->et_ci),
+ rec, insert_rec);
+}
+
+static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *rec)
+{
+ int ret = 0;
+
+ if (et->et_ops->eo_insert_check)
+ ret = et->et_ops->eo_insert_check(et, rec);
+ return ret;
+}
+
+static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
+{
+ int ret = 0;
+
+ if (et->et_ops->eo_sanity_check)
+ ret = et->et_ops->eo_sanity_check(et);
+ return ret;
+}
+
+static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
+ struct ocfs2_extent_block *eb);
+static void ocfs2_adjust_rightmost_records(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_path *path,
+ struct ocfs2_extent_rec *insert_rec);
/*
* Reset the actual path elements so that we can re-use the structure
* to build another path. Generally, this involves freeing the buffer
* heads.
*/
-static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
+void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
{
int i, start = 0, depth = 0;
struct ocfs2_path_item *node;
@@ -106,11 +600,13 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
*/
if (keep_root)
depth = le16_to_cpu(path_root_el(path)->l_tree_depth);
+ else
+ path_root_access(path) = NULL;
path->p_tree_depth = depth;
}
-static void ocfs2_free_path(struct ocfs2_path *path)
+void ocfs2_free_path(struct ocfs2_path *path)
{
if (path) {
ocfs2_reinit_path(path, 0);
@@ -131,6 +627,7 @@ static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
BUG_ON(path_root_bh(dest) != path_root_bh(src));
BUG_ON(path_root_el(dest) != path_root_el(src));
+ BUG_ON(path_root_access(dest) != path_root_access(src));
ocfs2_reinit_path(dest, 1);
@@ -152,6 +649,7 @@ static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
int i;
BUG_ON(path_root_bh(dest) != path_root_bh(src));
+ BUG_ON(path_root_access(dest) != path_root_access(src));
for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
brelse(dest->p_node[i].bh);
@@ -187,7 +685,8 @@ static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
}
static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
- struct ocfs2_extent_list *root_el)
+ struct ocfs2_extent_list *root_el,
+ ocfs2_journal_access_func access)
{
struct ocfs2_path *path;
@@ -199,27 +698,54 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
get_bh(root_bh);
path_root_bh(path) = root_bh;
path_root_el(path) = root_el;
+ path_root_access(path) = access;
}
return path;
}
+struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
+{
+ return ocfs2_new_path(path_root_bh(path), path_root_el(path),
+ path_root_access(path));
+}
+
+struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
+{
+ return ocfs2_new_path(et->et_root_bh, et->et_root_el,
+ et->et_root_journal_access);
+}
+
/*
- * Allocate and initialize a new path based on a disk inode tree.
+ * Journal the buffer at depth idx. All idx>0 are extent_blocks,
+ * otherwise it's the root_access function.
+ *
+ * I don't like the way this function's name looks next to
+ * ocfs2_journal_access_path(), but I don't have a better one.
*/
-static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh)
+int ocfs2_path_bh_journal_access(handle_t *handle,
+ struct ocfs2_caching_info *ci,
+ struct ocfs2_path *path,
+ int idx)
{
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
- struct ocfs2_extent_list *el = &di->id2.i_list;
+ ocfs2_journal_access_func access = path_root_access(path);
+
+ if (!access)
+ access = ocfs2_journal_access;
- return ocfs2_new_path(di_bh, el);
+ if (idx)
+ access = ocfs2_journal_access_eb;
+
+ return access(handle, ci, path->p_node[idx].bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
}
/*
* Convenience function to journal all components in a path.
*/
-static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
- struct ocfs2_path *path)
+int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
+ handle_t *handle,
+ struct ocfs2_path *path)
{
int i, ret = 0;
@@ -227,8 +753,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
goto out;
for(i = 0; i < path_num_items(path); i++) {
- ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, ci, path, i);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -269,17 +794,9 @@ int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
return ret;
}
-enum ocfs2_contig_type {
- CONTIG_NONE = 0,
- CONTIG_LEFT,
- CONTIG_RIGHT,
- CONTIG_LEFTRIGHT,
-};
-
-
/*
* NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
- * ocfs2_extent_contig only work properly against leaf nodes!
+ * ocfs2_extent_rec_contig only work properly against leaf nodes!
*/
static int ocfs2_block_extent_contig(struct super_block *sb,
struct ocfs2_extent_rec *ext,
@@ -305,9 +822,9 @@ static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
}
static enum ocfs2_contig_type
- ocfs2_extent_contig(struct inode *inode,
- struct ocfs2_extent_rec *ext,
- struct ocfs2_extent_rec *insert_rec)
+ ocfs2_extent_rec_contig(struct super_block *sb,
+ struct ocfs2_extent_rec *ext,
+ struct ocfs2_extent_rec *insert_rec)
{
u64 blkno = le64_to_cpu(insert_rec->e_blkno);
@@ -320,12 +837,12 @@ static enum ocfs2_contig_type
return CONTIG_NONE;
if (ocfs2_extents_adjacent(ext, insert_rec) &&
- ocfs2_block_extent_contig(inode->i_sb, ext, blkno))
+ ocfs2_block_extent_contig(sb, ext, blkno))
return CONTIG_RIGHT;
blkno = le64_to_cpu(ext->e_blkno);
if (ocfs2_extents_adjacent(insert_rec, ext) &&
- ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno))
+ ocfs2_block_extent_contig(sb, insert_rec, blkno))
return CONTIG_LEFT;
return CONTIG_NONE;
@@ -363,46 +880,112 @@ struct ocfs2_merge_ctxt {
int c_split_covers_rec;
};
+static int ocfs2_validate_extent_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int rc;
+ struct ocfs2_extent_block *eb =
+ (struct ocfs2_extent_block *)bh->b_data;
+
+ trace_ocfs2_validate_extent_block((unsigned long long)bh->b_blocknr);
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ /*
+ * If the ecc fails, we return the error but otherwise
+ * leave the filesystem running. We know any error is
+ * local to this block.
+ */
+ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check);
+ if (rc) {
+ mlog(ML_ERROR, "Checksum failed for extent block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+ return rc;
+ }
+
+ /*
+ * Errors after here are fatal.
+ */
+
+ if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
+ ocfs2_error(sb,
+ "Extent block #%llu has bad signature %.*s",
+ (unsigned long long)bh->b_blocknr, 7,
+ eb->h_signature);
+ return -EINVAL;
+ }
+
+ if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
+ ocfs2_error(sb,
+ "Extent block #%llu has an invalid h_blkno "
+ "of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(eb->h_blkno));
+ return -EINVAL;
+ }
+
+ if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+ ocfs2_error(sb,
+ "Extent block #%llu has an invalid "
+ "h_fs_generation of #%u",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(eb->h_fs_generation));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
+ struct buffer_head **bh)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_block(ci, eb_blkno, &tmp,
+ ocfs2_validate_extent_block);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
+
/*
* How many free extents have we got before we need more meta data?
*/
int ocfs2_num_free_extents(struct ocfs2_super *osb,
- struct inode *inode,
- struct ocfs2_dinode *fe)
+ struct ocfs2_extent_tree *et)
{
int retval;
- struct ocfs2_extent_list *el;
+ struct ocfs2_extent_list *el = NULL;
struct ocfs2_extent_block *eb;
struct buffer_head *eb_bh = NULL;
+ u64 last_eb_blk = 0;
- mlog_entry_void();
-
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
- retval = -EIO;
- goto bail;
- }
+ el = et->et_root_el;
+ last_eb_blk = ocfs2_et_get_last_eb_blk(et);
- if (fe->i_last_eb_blk) {
- retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
- &eb_bh, OCFS2_BH_CACHED, inode);
+ if (last_eb_blk) {
+ retval = ocfs2_read_extent_block(et->et_ci, last_eb_blk,
+ &eb_bh);
if (retval < 0) {
mlog_errno(retval);
goto bail;
}
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
- } else
- el = &fe->id2.i_list;
+ }
BUG_ON(el->l_tree_depth != 0);
retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
bail:
- if (eb_bh)
- brelse(eb_bh);
+ brelse(eb_bh);
- mlog_exit(retval);
+ trace_ocfs2_num_free_extents(retval);
return retval;
}
@@ -411,9 +994,8 @@ bail:
* sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
* l_count for you
*/
-static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
- handle_t *handle,
- struct inode *inode,
+static int ocfs2_create_new_meta_bhs(handle_t *handle,
+ struct ocfs2_extent_tree *et,
int wanted,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head *bhs[])
@@ -421,17 +1003,17 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
int count, status, i;
u16 suballoc_bit_start;
u32 num_got;
- u64 first_blkno;
+ u64 suballoc_loc, first_blkno;
+ struct ocfs2_super *osb =
+ OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
struct ocfs2_extent_block *eb;
- mlog_entry_void();
-
count = 0;
while (count < wanted) {
- status = ocfs2_claim_metadata(osb,
- handle,
+ status = ocfs2_claim_metadata(handle,
meta_ac,
wanted - count,
+ &suballoc_loc,
&suballoc_bit_start,
&num_got,
&first_blkno);
@@ -443,14 +1025,15 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
for(i = count; i < (num_got + count); i++) {
bhs[i] = sb_getblk(osb->sb, first_blkno);
if (bhs[i] == NULL) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
- ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
+ ocfs2_set_new_buffer_uptodate(et->et_ci, bhs[i]);
- status = ocfs2_journal_access(handle, inode, bhs[i],
- OCFS2_JOURNAL_ACCESS_CREATE);
+ status = ocfs2_journal_access_eb(handle, et->et_ci,
+ bhs[i],
+ OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -462,7 +1045,9 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
eb->h_blkno = cpu_to_le64(first_blkno);
eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
- eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
+ eb->h_suballoc_slot =
+ cpu_to_le16(meta_ac->ac_alloc_slot);
+ eb->h_suballoc_loc = cpu_to_le64(suballoc_loc);
eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
eb->h_list.l_count =
cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
@@ -472,11 +1057,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
/* We'll also be dirtied by the caller, so
* this isn't absolutely necessary. */
- status = ocfs2_journal_dirty(handle, bhs[i]);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_journal_dirty(handle, bhs[i]);
}
count += num_got;
@@ -486,12 +1067,11 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
bail:
if (status < 0) {
for(i = 0; i < wanted; i++) {
- if (bhs[i])
- brelse(bhs[i]);
+ brelse(bhs[i]);
bhs[i] = NULL;
}
+ mlog_errno(status);
}
- mlog_exit(status);
return status;
}
@@ -518,8 +1098,54 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
}
/*
+ * Change range of the branches in the right most path according to the leaf
+ * extent block's rightmost record.
+ */
+static int ocfs2_adjust_rightmost_branch(handle_t *handle,
+ struct ocfs2_extent_tree *et)
+{
+ int status;
+ struct ocfs2_path *path = NULL;
+ struct ocfs2_extent_list *el;
+ struct ocfs2_extent_rec *rec;
+
+ path = ocfs2_new_path_from_et(et);
+ if (!path) {
+ status = -ENOMEM;
+ return status;
+ }
+
+ status = ocfs2_find_path(et->et_ci, path, UINT_MAX);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_extend_trans(handle, path_num_items(path));
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_journal_access_path(et->et_ci, handle, path);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ el = path_leaf_el(path);
+ rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec) - 1];
+
+ ocfs2_adjust_rightmost_records(handle, et, path, rec);
+
+out:
+ ocfs2_free_path(path);
+ return status;
+}
+
+/*
* Add an entire tree branch to our inode. eb_bh is the extent block
- * to start at, if we don't want to start the branch at the dinode
+ * to start at, if we don't want to start the branch at the root
* structure.
*
* last_eb_bh is required as we have to update it's next_leaf pointer
@@ -528,10 +1154,8 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
* the new branch will be 'empty' in the sense that every block will
* contain a single record with cluster count == 0.
*/
-static int ocfs2_add_branch(struct ocfs2_super *osb,
- handle_t *handle,
- struct inode *inode,
- struct buffer_head *fe_bh,
+static int ocfs2_add_branch(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct buffer_head *eb_bh,
struct buffer_head **last_eb_bh,
struct ocfs2_alloc_context *meta_ac)
@@ -540,29 +1164,48 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
u64 next_blkno, new_last_eb_blk;
struct buffer_head *bh;
struct buffer_head **new_eb_bhs = NULL;
- struct ocfs2_dinode *fe;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *eb_el;
struct ocfs2_extent_list *el;
- u32 new_cpos;
-
- mlog_entry_void();
+ u32 new_cpos, root_end;
BUG_ON(!last_eb_bh || !*last_eb_bh);
- fe = (struct ocfs2_dinode *) fe_bh->b_data;
-
if (eb_bh) {
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
} else
- el = &fe->id2.i_list;
+ el = et->et_root_el;
/* we never add a branch to a leaf. */
BUG_ON(!el->l_tree_depth);
new_blocks = le16_to_cpu(el->l_tree_depth);
+ eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
+ new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
+ root_end = ocfs2_sum_rightmost_rec(et->et_root_el);
+
+ /*
+ * If there is a gap before the root end and the real end
+ * of the righmost leaf block, we need to remove the gap
+ * between new_cpos and root_end first so that the tree
+ * is consistent after we add a new branch(it will start
+ * from new_cpos).
+ */
+ if (root_end > new_cpos) {
+ trace_ocfs2_adjust_rightmost_branch(
+ (unsigned long long)
+ ocfs2_metadata_cache_owner(et->et_ci),
+ root_end, new_cpos);
+
+ status = ocfs2_adjust_rightmost_branch(handle, et);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
/* allocate the number of new eb blocks we need */
new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
GFP_KERNEL);
@@ -572,16 +1215,13 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
goto bail;
}
- status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks,
+ status = ocfs2_create_new_meta_bhs(handle, et, new_blocks,
meta_ac, new_eb_bhs);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
- new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
-
/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
* linked with the rest of the tree.
* conversly, new_eb_bhs[0] is the new bottommost leaf.
@@ -593,15 +1233,12 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
for(i = 0; i < new_blocks; i++) {
bh = new_eb_bhs[i];
eb = (struct ocfs2_extent_block *) bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
+ /* ocfs2_create_new_meta_bhs() should create it right! */
+ BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
eb_el = &eb->h_list;
- status = ocfs2_journal_access(handle, inode, bh,
- OCFS2_JOURNAL_ACCESS_CREATE);
+ status = ocfs2_journal_access_eb(handle, et->et_ci, bh,
+ OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -625,12 +1262,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
if (!eb_el->l_tree_depth)
new_last_eb_blk = le64_to_cpu(eb->h_blkno);
- status = ocfs2_journal_dirty(handle, bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
+ ocfs2_journal_dirty(handle, bh);
next_blkno = le64_to_cpu(eb->h_blkno);
}
@@ -640,21 +1272,21 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
* journal_dirty erroring as it won't unless we've aborted the
* handle (in which case we would never be here) so reserving
* the write with journal_access is all we need to do. */
- status = ocfs2_journal_access(handle, inode, *last_eb_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ status = ocfs2_journal_access_eb(handle, et->et_ci, *last_eb_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- status = ocfs2_journal_access(handle, inode, fe_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ status = ocfs2_et_root_journal_access(handle, et,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
if (eb_bh) {
- status = ocfs2_journal_access(handle, inode, eb_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ status = ocfs2_journal_access_eb(handle, et->et_ci, eb_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -662,7 +1294,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
}
/* Link the new branch into the rest of the tree (el will
- * either be on the fe, or the extent block passed in. */
+ * either be on the root_bh, or the extent block passed in. */
i = le16_to_cpu(el->l_next_free_rec);
el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
@@ -671,22 +1303,15 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
/* fe needs a new last extent block pointer, as does the
* next_leaf on the previously last-extent-block. */
- fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
+ ocfs2_et_set_last_eb_blk(et, new_last_eb_blk);
eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
- status = ocfs2_journal_dirty(handle, *last_eb_bh);
- if (status < 0)
- mlog_errno(status);
- status = ocfs2_journal_dirty(handle, fe_bh);
- if (status < 0)
- mlog_errno(status);
- if (eb_bh) {
- status = ocfs2_journal_dirty(handle, eb_bh);
- if (status < 0)
- mlog_errno(status);
- }
+ ocfs2_journal_dirty(handle, *last_eb_bh);
+ ocfs2_journal_dirty(handle, et->et_root_bh);
+ if (eb_bh)
+ ocfs2_journal_dirty(handle, eb_bh);
/*
* Some callers want to track the rightmost leaf so pass it
@@ -700,12 +1325,10 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
bail:
if (new_eb_bhs) {
for (i = 0; i < new_blocks; i++)
- if (new_eb_bhs[i])
- brelse(new_eb_bhs[i]);
+ brelse(new_eb_bhs[i]);
kfree(new_eb_bhs);
}
- mlog_exit(status);
return status;
}
@@ -714,24 +1337,19 @@ bail:
* returns back the new extent block so you can add a branch to it
* after this call.
*/
-static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
- handle_t *handle,
- struct inode *inode,
- struct buffer_head *fe_bh,
+static int ocfs2_shift_tree_depth(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **ret_new_eb_bh)
{
int status, i;
u32 new_clusters;
struct buffer_head *new_eb_bh = NULL;
- struct ocfs2_dinode *fe;
struct ocfs2_extent_block *eb;
- struct ocfs2_extent_list *fe_el;
+ struct ocfs2_extent_list *root_el;
struct ocfs2_extent_list *eb_el;
- mlog_entry_void();
-
- status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac,
+ status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
&new_eb_bh);
if (status < 0) {
mlog_errno(status);
@@ -739,37 +1357,29 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
}
eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
+ /* ocfs2_create_new_meta_bhs() should create it right! */
+ BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
eb_el = &eb->h_list;
- fe = (struct ocfs2_dinode *) fe_bh->b_data;
- fe_el = &fe->id2.i_list;
+ root_el = et->et_root_el;
- status = ocfs2_journal_access(handle, inode, new_eb_bh,
- OCFS2_JOURNAL_ACCESS_CREATE);
+ status = ocfs2_journal_access_eb(handle, et->et_ci, new_eb_bh,
+ OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- /* copy the fe data into the new extent block */
- eb_el->l_tree_depth = fe_el->l_tree_depth;
- eb_el->l_next_free_rec = fe_el->l_next_free_rec;
- for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++)
- eb_el->l_recs[i] = fe_el->l_recs[i];
+ /* copy the root extent list data into the new extent block */
+ eb_el->l_tree_depth = root_el->l_tree_depth;
+ eb_el->l_next_free_rec = root_el->l_next_free_rec;
+ for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
+ eb_el->l_recs[i] = root_el->l_recs[i];
- status = ocfs2_journal_dirty(handle, new_eb_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_journal_dirty(handle, new_eb_bh);
- status = ocfs2_journal_access(handle, inode, fe_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ status = ocfs2_et_root_journal_access(handle, et,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -777,34 +1387,28 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
new_clusters = ocfs2_sum_rightmost_rec(eb_el);
- /* update fe now */
- le16_add_cpu(&fe_el->l_tree_depth, 1);
- fe_el->l_recs[0].e_cpos = 0;
- fe_el->l_recs[0].e_blkno = eb->h_blkno;
- fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
- for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++)
- memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
- fe_el->l_next_free_rec = cpu_to_le16(1);
+ /* update root_bh now */
+ le16_add_cpu(&root_el->l_tree_depth, 1);
+ root_el->l_recs[0].e_cpos = 0;
+ root_el->l_recs[0].e_blkno = eb->h_blkno;
+ root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
+ for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
+ memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
+ root_el->l_next_free_rec = cpu_to_le16(1);
/* If this is our 1st tree depth shift, then last_eb_blk
* becomes the allocated extent block */
- if (fe_el->l_tree_depth == cpu_to_le16(1))
- fe->i_last_eb_blk = eb->h_blkno;
+ if (root_el->l_tree_depth == cpu_to_le16(1))
+ ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
- status = ocfs2_journal_dirty(handle, fe_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_journal_dirty(handle, et->et_root_bh);
*ret_new_eb_bh = new_eb_bh;
new_eb_bh = NULL;
status = 0;
bail:
- if (new_eb_bh)
- brelse(new_eb_bh);
+ brelse(new_eb_bh);
- mlog_exit(status);
return status;
}
@@ -817,77 +1421,64 @@ bail:
* 1) a lowest extent block is found, then we pass it back in
* *lowest_eb_bh and return '0'
*
- * 2) the search fails to find anything, but the dinode has room. We
+ * 2) the search fails to find anything, but the root_el has room. We
* pass NULL back in *lowest_eb_bh, but still return '0'
*
- * 3) the search fails to find anything AND the dinode is full, in
+ * 3) the search fails to find anything AND the root_el is full, in
* which case we return > 0
*
* return status < 0 indicates an error.
*/
-static int ocfs2_find_branch_target(struct ocfs2_super *osb,
- struct inode *inode,
- struct buffer_head *fe_bh,
+static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
struct buffer_head **target_bh)
{
int status = 0, i;
u64 blkno;
- struct ocfs2_dinode *fe;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
struct buffer_head *bh = NULL;
struct buffer_head *lowest_bh = NULL;
- mlog_entry_void();
-
*target_bh = NULL;
- fe = (struct ocfs2_dinode *) fe_bh->b_data;
- el = &fe->id2.i_list;
+ el = et->et_root_el;
while(le16_to_cpu(el->l_tree_depth) > 1) {
if (le16_to_cpu(el->l_next_free_rec) == 0) {
- ocfs2_error(inode->i_sb, "Dinode %llu has empty "
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has empty "
"extent list (next_free_rec == 0)",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
status = -EIO;
goto bail;
}
i = le16_to_cpu(el->l_next_free_rec) - 1;
blkno = le64_to_cpu(el->l_recs[i].e_blkno);
if (!blkno) {
- ocfs2_error(inode->i_sb, "Dinode %llu has extent "
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has extent "
"list where extent # %d has no physical "
"block start",
- (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i);
status = -EIO;
goto bail;
}
- if (bh) {
- brelse(bh);
- bh = NULL;
- }
+ brelse(bh);
+ bh = NULL;
- status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED,
- inode);
+ status = ocfs2_read_extent_block(et->et_ci, blkno, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
eb = (struct ocfs2_extent_block *) bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
el = &eb->h_list;
if (le16_to_cpu(el->l_next_free_rec) <
le16_to_cpu(el->l_count)) {
- if (lowest_bh)
- brelse(lowest_bh);
+ brelse(lowest_bh);
lowest_bh = bh;
get_bh(lowest_bh);
}
@@ -895,16 +1486,14 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
/* If we didn't find one and the fe doesn't have any room,
* then return '1' */
- if (!lowest_bh
- && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
+ el = et->et_root_el;
+ if (!lowest_bh && (el->l_next_free_rec == el->l_count))
status = 1;
*target_bh = lowest_bh;
bail:
- if (bh)
- brelse(bh);
+ brelse(bh);
- mlog_exit(status);
return status;
}
@@ -918,20 +1507,18 @@ bail:
*
* *last_eb_bh will be updated by ocfs2_add_branch().
*/
-static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
- struct buffer_head *di_bh, int *final_depth,
- struct buffer_head **last_eb_bh,
+static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
+ int *final_depth, struct buffer_head **last_eb_bh,
struct ocfs2_alloc_context *meta_ac)
{
int ret, shift;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
- int depth = le16_to_cpu(di->id2.i_list.l_tree_depth);
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_extent_list *el = et->et_root_el;
+ int depth = le16_to_cpu(el->l_tree_depth);
struct buffer_head *bh = NULL;
BUG_ON(meta_ac == NULL);
- shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh);
+ shift = ocfs2_find_branch_target(et, &bh);
if (shift < 0) {
ret = shift;
mlog_errno(ret);
@@ -943,13 +1530,15 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
* another tree level */
if (shift) {
BUG_ON(bh);
- mlog(0, "need to shift tree depth (current = %d)\n", depth);
+ trace_ocfs2_grow_tree(
+ (unsigned long long)
+ ocfs2_metadata_cache_owner(et->et_ci),
+ depth);
/* ocfs2_shift_tree_depth will return us a buffer with
* the new extent block (so we can pass that to
* ocfs2_add_branch). */
- ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh,
- meta_ac, &bh);
+ ret = ocfs2_shift_tree_depth(handle, et, meta_ac, &bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -974,8 +1563,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
/* call ocfs2_add_branch to add the final part of the tree with
* the new data. */
- mlog(0, "add branch. bh = %p\n", bh);
- ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh,
+ ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
meta_ac);
if (ret < 0) {
mlog_errno(ret);
@@ -990,15 +1578,6 @@ out:
}
/*
- * This is only valid for leaf nodes, which are the only ones that can
- * have empty extents anyway.
- */
-static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
-{
- return !rec->e_leaf_clusters;
-}
-
-/*
* This function will discard the rightmost extent record.
*/
static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
@@ -1029,8 +1608,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
BUG_ON(!next_free);
/* The tree code before us didn't allow enough room in the leaf. */
- if (el->l_next_free_rec == el->l_count && !has_empty)
- BUG();
+ BUG_ON(el->l_next_free_rec == el->l_count && !has_empty);
/*
* The easiest way to approach this is to just remove the
@@ -1059,8 +1637,9 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
}
insert_index = i;
- mlog(0, "ins %u: index %d, has_empty %d, next_free %d, count %d\n",
- insert_cpos, insert_index, has_empty, next_free, le16_to_cpu(el->l_count));
+ trace_ocfs2_rotate_leaf(insert_cpos, insert_index,
+ has_empty, next_free,
+ le16_to_cpu(el->l_count));
BUG_ON(insert_index < 0);
BUG_ON(insert_index >= le16_to_cpu(el->l_count));
@@ -1155,9 +1734,9 @@ set_and_inc:
*
* The array index of the subtree root is passed back.
*/
-static int ocfs2_find_subtree_root(struct inode *inode,
- struct ocfs2_path *left,
- struct ocfs2_path *right)
+int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
+ struct ocfs2_path *left,
+ struct ocfs2_path *right)
{
int i = 0;
@@ -1173,10 +1752,10 @@ static int ocfs2_find_subtree_root(struct inode *inode,
* The caller didn't pass two adjacent paths.
*/
mlog_bug_on_msg(i > left->p_tree_depth,
- "Inode %lu, left depth %u, right depth %u\n"
+ "Owner %llu, left depth %u, right depth %u\n"
"left leaf blk %llu, right leaf blk %llu\n",
- inode->i_ino, left->p_tree_depth,
- right->p_tree_depth,
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ left->p_tree_depth, right->p_tree_depth,
(unsigned long long)path_leaf_bh(left)->b_blocknr,
(unsigned long long)path_leaf_bh(right)->b_blocknr);
} while (left->p_node[i].bh->b_blocknr ==
@@ -1193,7 +1772,7 @@ typedef void (path_insert_t)(void *, struct buffer_head *);
* This code can be called with a cpos larger than the tree, in which
* case it will return the rightmost path.
*/
-static int __ocfs2_find_path(struct inode *inode,
+static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
struct ocfs2_extent_list *root_el, u32 cpos,
path_insert_t *func, void *data)
{
@@ -1204,15 +1783,14 @@ static int __ocfs2_find_path(struct inode *inode,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
struct ocfs2_extent_rec *rec;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
el = root_el;
while (el->l_tree_depth) {
if (le16_to_cpu(el->l_next_free_rec) == 0) {
- ocfs2_error(inode->i_sb,
- "Inode %llu has empty extent list at "
+ ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+ "Owner %llu has empty extent list at "
"depth %u\n",
- (unsigned long long)oi->ip_blkno,
+ (unsigned long long)ocfs2_metadata_cache_owner(ci),
le16_to_cpu(el->l_tree_depth));
ret = -EROFS;
goto out;
@@ -1235,10 +1813,10 @@ static int __ocfs2_find_path(struct inode *inode,
blkno = le64_to_cpu(el->l_recs[i].e_blkno);
if (blkno == 0) {
- ocfs2_error(inode->i_sb,
- "Inode %llu has bad blkno in extent list "
+ ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+ "Owner %llu has bad blkno in extent list "
"at depth %u (index %d)\n",
- (unsigned long long)oi->ip_blkno,
+ (unsigned long long)ocfs2_metadata_cache_owner(ci),
le16_to_cpu(el->l_tree_depth), i);
ret = -EROFS;
goto out;
@@ -1246,8 +1824,7 @@ static int __ocfs2_find_path(struct inode *inode,
brelse(bh);
bh = NULL;
- ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
- &bh, OCFS2_BH_CACHED, inode);
+ ret = ocfs2_read_extent_block(ci, blkno, &bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1255,18 +1832,13 @@ static int __ocfs2_find_path(struct inode *inode,
eb = (struct ocfs2_extent_block *) bh->b_data;
el = &eb->h_list;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EIO;
- goto out;
- }
if (le16_to_cpu(el->l_next_free_rec) >
le16_to_cpu(el->l_count)) {
- ocfs2_error(inode->i_sb,
- "Inode %llu has bad count in extent list "
+ ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+ "Owner %llu has bad count in extent list "
"at block %llu (next free=%u, count=%u)\n",
- (unsigned long long)oi->ip_blkno,
+ (unsigned long long)ocfs2_metadata_cache_owner(ci),
(unsigned long long)bh->b_blocknr,
le16_to_cpu(el->l_next_free_rec),
le16_to_cpu(el->l_count));
@@ -1310,14 +1882,14 @@ static void find_path_ins(void *data, struct buffer_head *bh)
ocfs2_path_insert_eb(fp->path, fp->index, bh);
fp->index++;
}
-static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
- u32 cpos)
+int ocfs2_find_path(struct ocfs2_caching_info *ci,
+ struct ocfs2_path *path, u32 cpos)
{
struct find_path_data data;
data.index = 1;
data.path = path;
- return __ocfs2_find_path(inode, path_root_el(path), cpos,
+ return __ocfs2_find_path(ci, path_root_el(path), cpos,
find_path_ins, &data);
}
@@ -1342,13 +1914,14 @@ static void find_leaf_ins(void *data, struct buffer_head *bh)
*
* This function doesn't handle non btree extent lists.
*/
-int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
- u32 cpos, struct buffer_head **leaf_bh)
+int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
+ struct ocfs2_extent_list *root_el, u32 cpos,
+ struct buffer_head **leaf_bh)
{
int ret;
struct buffer_head *bh = NULL;
- ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh);
+ ret = __ocfs2_find_path(ci, root_el, cpos, find_leaf_ins, &bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1388,7 +1961,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
* immediately to their right.
*/
left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
- if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
+ if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
+ BUG_ON(right_child_el->l_tree_depth);
BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
}
@@ -1450,13 +2024,15 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
* - When our insert into the right path leaf is at the leftmost edge
* and requires an update of the path immediately to it's left. This
* can occur at the end of some types of rotation and appending inserts.
+ * - When we've adjusted the last extent record in the left path leaf and the
+ * 1st extent record in the right path leaf during cross extent block merge.
*/
-static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
+static void ocfs2_complete_edge_insert(handle_t *handle,
struct ocfs2_path *left_path,
struct ocfs2_path *right_path,
int subtree_index)
{
- int ret, i, idx;
+ int i, idx;
struct ocfs2_extent_list *el, *left_el, *right_el;
struct ocfs2_extent_rec *left_rec, *right_rec;
struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
@@ -1476,7 +2052,7 @@ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
left_el = path_leaf_el(left_path);
right_el = path_leaf_el(right_path);
for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) {
- mlog(0, "Adjust records at index %u\n", i);
+ trace_ocfs2_complete_edge_insert(i);
/*
* One nice property of knowing that all of these
@@ -1494,13 +2070,8 @@ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
right_el);
- ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
- if (ret)
- mlog_errno(ret);
-
- ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
+ ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
/*
* Setup our list pointers now so that the current
@@ -1524,13 +2095,11 @@ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
root_bh = left_path->p_node[subtree_index].bh;
- ret = ocfs2_journal_dirty(handle, root_bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, root_bh);
}
-static int ocfs2_rotate_subtree_right(struct inode *inode,
- handle_t *handle,
+static int ocfs2_rotate_subtree_right(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_path *left_path,
struct ocfs2_path *right_path,
int subtree_index)
@@ -1546,10 +2115,10 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
left_el = path_leaf_el(left_path);
if (left_el->l_next_free_rec != left_el->l_count) {
- ocfs2_error(inode->i_sb,
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Inode %llu has non-full interior leaf node %llu"
"(next free = %u)",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
(unsigned long long)left_leaf_bh->b_blocknr,
le16_to_cpu(left_el->l_next_free_rec));
return -EROFS;
@@ -1565,25 +2134,23 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
root_bh = left_path->p_node[subtree_index].bh;
BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
- ret = ocfs2_journal_access(handle, inode, root_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
+ subtree_index);
if (ret) {
mlog_errno(ret);
goto out;
}
for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
- ret = ocfs2_journal_access(handle, inode,
- right_path->p_node[i].bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ right_path, i);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_journal_access(handle, inode,
- left_path->p_node[i].bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ left_path, i);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1596,16 +2163,12 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
/* This is a code error, not a disk corruption. */
mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails "
"because rightmost leaf block %llu is empty\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
(unsigned long long)right_leaf_bh->b_blocknr);
ocfs2_create_empty_extent(right_el);
- ret = ocfs2_journal_dirty(handle, right_leaf_bh);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
+ ocfs2_journal_dirty(handle, right_leaf_bh);
/* Do the copy now. */
i = le16_to_cpu(left_el->l_next_free_rec) - 1;
@@ -1624,14 +2187,10 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
le16_add_cpu(&left_el->l_next_free_rec, 1);
- ret = ocfs2_journal_dirty(handle, left_leaf_bh);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
+ ocfs2_journal_dirty(handle, left_leaf_bh);
- ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
- subtree_index);
+ ocfs2_complete_edge_insert(handle, left_path, right_path,
+ subtree_index);
out:
return ret;
@@ -1643,8 +2202,8 @@ out:
*
* Will return zero if the path passed in is already the leftmost path.
*/
-static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
- struct ocfs2_path *path, u32 *cpos)
+int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
+ struct ocfs2_path *path, u32 *cpos)
{
int i, j, ret = 0;
u64 blkno;
@@ -1721,12 +2280,14 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
int op_credits,
struct ocfs2_path *path)
{
+ int ret = 0;
int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
if (handle->h_buffer_credits < credits)
- return ocfs2_extend_trans(handle, credits);
+ ret = ocfs2_extend_trans(handle,
+ credits - handle->h_buffer_credits);
- return 0;
+ return ret;
}
/*
@@ -1785,7 +2346,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
*
* The array is assumed to be large enough to hold an entire path (tree depth).
*
- * Upon succesful return from this function:
+ * Upon successful return from this function:
*
* - The 'right_path' array will contain a path to the leaf block
* whose range contains e_cpos.
@@ -1794,8 +2355,8 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
* *ret_left_path will contain a valid path which can be passed to
* ocfs2_insert_path().
*/
-static int ocfs2_rotate_tree_right(struct inode *inode,
- handle_t *handle,
+static int ocfs2_rotate_tree_right(handle_t *handle,
+ struct ocfs2_extent_tree *et,
enum ocfs2_split_type split,
u32 insert_cpos,
struct ocfs2_path *right_path,
@@ -1804,24 +2365,26 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
int ret, start, orig_credits = handle->h_buffer_credits;
u32 cpos;
struct ocfs2_path *left_path = NULL;
+ struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
*ret_left_path = NULL;
- left_path = ocfs2_new_path(path_root_bh(right_path),
- path_root_el(right_path));
+ left_path = ocfs2_new_path_from_path(right_path);
if (!left_path) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos);
+ ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
if (ret) {
mlog_errno(ret);
goto out;
}
- mlog(0, "Insert: %u, first left path cpos: %u\n", insert_cpos, cpos);
+ trace_ocfs2_rotate_tree_right(
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ insert_cpos, cpos);
/*
* What we want to do here is:
@@ -1850,10 +2413,12 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
* rotating subtrees.
*/
while (cpos && insert_cpos <= cpos) {
- mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n",
- insert_cpos, cpos);
+ trace_ocfs2_rotate_tree_right(
+ (unsigned long long)
+ ocfs2_metadata_cache_owner(et->et_ci),
+ insert_cpos, cpos);
- ret = ocfs2_find_path(inode, left_path, cpos);
+ ret = ocfs2_find_path(et->et_ci, left_path, cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1861,10 +2426,11 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
mlog_bug_on_msg(path_leaf_bh(left_path) ==
path_leaf_bh(right_path),
- "Inode %lu: error during insert of %u "
+ "Owner %llu: error during insert of %u "
"(left path cpos %u) results in two identical "
"paths ending at %llu\n",
- inode->i_ino, insert_cpos, cpos,
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ insert_cpos, cpos,
(unsigned long long)
path_leaf_bh(left_path)->b_blocknr);
@@ -1890,12 +2456,12 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
goto out_ret_path;
}
- start = ocfs2_find_subtree_root(inode, left_path, right_path);
+ start = ocfs2_find_subtree_root(et, left_path, right_path);
- mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
- start,
- (unsigned long long) right_path->p_node[start].bh->b_blocknr,
- right_path->p_tree_depth);
+ trace_ocfs2_rotate_subtree(start,
+ (unsigned long long)
+ right_path->p_node[start].bh->b_blocknr,
+ right_path->p_tree_depth);
ret = ocfs2_extend_rotate_transaction(handle, start,
orig_credits, right_path);
@@ -1904,7 +2470,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
goto out;
}
- ret = ocfs2_rotate_subtree_right(inode, handle, left_path,
+ ret = ocfs2_rotate_subtree_right(handle, et, left_path,
right_path, start);
if (ret) {
mlog_errno(ret);
@@ -1936,8 +2502,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
*/
ocfs2_mv_path(right_path, left_path);
- ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path,
- &cpos);
+ ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1951,15 +2516,37 @@ out_ret_path:
return ret;
}
-static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
- struct ocfs2_path *path)
+static int ocfs2_update_edge_lengths(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ int subtree_index, struct ocfs2_path *path)
{
- int i, idx;
+ int i, idx, ret;
struct ocfs2_extent_rec *rec;
struct ocfs2_extent_list *el;
struct ocfs2_extent_block *eb;
u32 range;
+ /*
+ * In normal tree rotation process, we will never touch the
+ * tree branch above subtree_index and ocfs2_extend_rotate_transaction
+ * doesn't reserve the credits for them either.
+ *
+ * But we do have a special case here which will update the rightmost
+ * records for all the bh in the path.
+ * So we have to allocate extra credits and access them.
+ */
+ ret = ocfs2_extend_trans(handle, subtree_index);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_journal_access_path(et->et_ci, handle, path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
/* Path should always be rightmost. */
eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -1980,9 +2567,12 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
ocfs2_journal_dirty(handle, path->p_node[i].bh);
}
+out:
+ return ret;
}
-static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
+static void ocfs2_unlink_path(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_cached_dealloc_ctxt *dealloc,
struct ocfs2_path *path, int unlink_start)
{
@@ -2004,12 +2594,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
mlog(ML_ERROR,
"Inode %llu, attempted to remove extent block "
"%llu with %u records\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
(unsigned long long)le64_to_cpu(eb->h_blkno),
le16_to_cpu(el->l_next_free_rec));
ocfs2_journal_dirty(handle, bh);
- ocfs2_remove_from_cache(inode, bh);
+ ocfs2_remove_from_cache(et->et_ci, bh);
continue;
}
@@ -2022,11 +2612,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
if (ret)
mlog_errno(ret);
- ocfs2_remove_from_cache(inode, bh);
+ ocfs2_remove_from_cache(et->et_ci, bh);
}
}
-static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
+static void ocfs2_unlink_subtree(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_path *left_path,
struct ocfs2_path *right_path,
int subtree_index,
@@ -2057,11 +2648,12 @@ static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
ocfs2_journal_dirty(handle, root_bh);
ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
- ocfs2_unlink_path(inode, handle, dealloc, right_path,
+ ocfs2_unlink_path(handle, et, dealloc, right_path,
subtree_index + 1);
}
-static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
+static int ocfs2_rotate_subtree_left(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_path *left_path,
struct ocfs2_path *right_path,
int subtree_index,
@@ -2069,8 +2661,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
int *deleted)
{
int ret, i, del_right_subtree = 0, right_has_empty = 0;
- struct buffer_head *root_bh, *di_bh = path_root_bh(right_path);
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+ struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
struct ocfs2_extent_block *eb;
@@ -2103,9 +2694,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
return -EAGAIN;
if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
- ret = ocfs2_journal_access(handle, inode,
- path_leaf_bh(right_path),
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_journal_access_eb(handle, et->et_ci,
+ path_leaf_bh(right_path),
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2122,8 +2713,8 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
* We have to update i_last_eb_blk during the meta
* data delete.
*/
- ret = ocfs2_journal_access(handle, inode, di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_et_root_journal_access(handle, et,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2138,25 +2729,23 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
*/
BUG_ON(right_has_empty && !del_right_subtree);
- ret = ocfs2_journal_access(handle, inode, root_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
+ subtree_index);
if (ret) {
mlog_errno(ret);
goto out;
}
for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
- ret = ocfs2_journal_access(handle, inode,
- right_path->p_node[i].bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ right_path, i);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_journal_access(handle, inode,
- left_path->p_node[i].bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ left_path, i);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2184,20 +2773,21 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
ocfs2_remove_empty_extent(right_leaf_el);
}
- ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
- if (ret)
- mlog_errno(ret);
- ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
+ ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
if (del_right_subtree) {
- ocfs2_unlink_subtree(inode, handle, left_path, right_path,
+ ocfs2_unlink_subtree(handle, et, left_path, right_path,
subtree_index, dealloc);
- ocfs2_update_edge_lengths(inode, handle, left_path);
+ ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
+ left_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
- di->i_last_eb_blk = eb->h_blkno;
+ ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
/*
* Removal of the extent in the left leaf was skipped
@@ -2207,13 +2797,11 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
if (right_has_empty)
ocfs2_remove_empty_extent(left_leaf_el);
- ret = ocfs2_journal_dirty(handle, di_bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, et_root_bh);
*deleted = 1;
} else
- ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
+ ocfs2_complete_edge_insert(handle, left_path, right_path,
subtree_index);
out:
@@ -2229,8 +2817,8 @@ out:
* This looks similar, but is subtly different to
* ocfs2_find_cpos_for_left_leaf().
*/
-static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
- struct ocfs2_path *path, u32 *cpos)
+int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
+ struct ocfs2_path *path, u32 *cpos)
{
int i, j, ret = 0;
u64 blkno;
@@ -2299,35 +2887,34 @@ out:
return ret;
}
-static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
- handle_t *handle,
- struct buffer_head *bh,
- struct ocfs2_extent_list *el)
+static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_path *path)
{
int ret;
+ struct buffer_head *bh = path_leaf_bh(path);
+ struct ocfs2_extent_list *el = path_leaf_el(path);
if (!ocfs2_is_empty_extent(&el->l_recs[0]))
return 0;
- ret = ocfs2_journal_access(handle, inode, bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
+ path_num_items(path) - 1);
if (ret) {
mlog_errno(ret);
goto out;
}
ocfs2_remove_empty_extent(el);
-
- ret = ocfs2_journal_dirty(handle, bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, bh);
out:
return ret;
}
-static int __ocfs2_rotate_tree_left(struct inode *inode,
- handle_t *handle, int orig_credits,
+static int __ocfs2_rotate_tree_left(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ int orig_credits,
struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc,
struct ocfs2_path **empty_extent_path)
@@ -2336,20 +2923,19 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
u32 right_cpos;
struct ocfs2_path *left_path = NULL;
struct ocfs2_path *right_path = NULL;
+ struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
*empty_extent_path = NULL;
- ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path,
- &right_cpos);
+ ret = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
if (ret) {
mlog_errno(ret);
goto out;
}
- left_path = ocfs2_new_path(path_root_bh(path),
- path_root_el(path));
+ left_path = ocfs2_new_path_from_path(path);
if (!left_path) {
ret = -ENOMEM;
mlog_errno(ret);
@@ -2358,8 +2944,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
ocfs2_cp_path(left_path, path);
- right_path = ocfs2_new_path(path_root_bh(path),
- path_root_el(path));
+ right_path = ocfs2_new_path_from_path(path);
if (!right_path) {
ret = -ENOMEM;
mlog_errno(ret);
@@ -2367,17 +2952,16 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
}
while (right_cpos) {
- ret = ocfs2_find_path(inode, right_path, right_cpos);
+ ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
if (ret) {
mlog_errno(ret);
goto out;
}
- subtree_root = ocfs2_find_subtree_root(inode, left_path,
+ subtree_root = ocfs2_find_subtree_root(et, left_path,
right_path);
- mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
- subtree_root,
+ trace_ocfs2_rotate_subtree(subtree_root,
(unsigned long long)
right_path->p_node[subtree_root].bh->b_blocknr,
right_path->p_tree_depth);
@@ -2393,15 +2977,14 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
* Caller might still want to make changes to the
* tree root, so re-add it to the journal here.
*/
- ret = ocfs2_journal_access(handle, inode,
- path_root_bh(left_path),
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ left_path, 0);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
+ ret = ocfs2_rotate_subtree_left(handle, et, left_path,
right_path, subtree_root,
dealloc, &deleted);
if (ret == -EAGAIN) {
@@ -2430,7 +3013,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
ocfs2_mv_path(left_path, right_path);
- ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
+ ret = ocfs2_find_cpos_for_right_leaf(sb, left_path,
&right_cpos);
if (ret) {
mlog_errno(ret);
@@ -2445,30 +3028,21 @@ out:
return ret;
}
-static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
- struct ocfs2_path *path,
- struct ocfs2_cached_dealloc_ctxt *dealloc)
+static int ocfs2_remove_rightmost_path(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_path *path,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
{
int ret, subtree_index;
u32 cpos;
struct ocfs2_path *left_path = NULL;
- struct ocfs2_dinode *di;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
- /*
- * XXX: This code assumes that the root is an inode, which is
- * true for now but may change as tree code gets generic.
- */
- di = (struct ocfs2_dinode *)path_root_bh(path)->b_data;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- ret = -EIO;
- ocfs2_error(inode->i_sb,
- "Inode %llu has invalid path root",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
- goto out;
- }
+ ret = ocfs2_et_sanity_check(et);
+ if (ret)
+ goto out;
/*
* There's two ways we handle this depending on
* whether path is the only existing one.
@@ -2481,13 +3055,14 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
goto out;
}
- ret = ocfs2_journal_access_path(inode, handle, path);
+ ret = ocfs2_journal_access_path(et->et_ci, handle, path);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
+ ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+ path, &cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2498,50 +3073,54 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
* We have a path to the left of this one - it needs
* an update too.
*/
- left_path = ocfs2_new_path(path_root_bh(path),
- path_root_el(path));
+ left_path = ocfs2_new_path_from_path(path);
if (!left_path) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- ret = ocfs2_find_path(inode, left_path, cpos);
+ ret = ocfs2_find_path(et->et_ci, left_path, cpos);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_journal_access_path(inode, handle, left_path);
+ ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
if (ret) {
mlog_errno(ret);
goto out;
}
- subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
+ subtree_index = ocfs2_find_subtree_root(et, left_path, path);
- ocfs2_unlink_subtree(inode, handle, left_path, path,
+ ocfs2_unlink_subtree(handle, et, left_path, path,
subtree_index, dealloc);
- ocfs2_update_edge_lengths(inode, handle, left_path);
+ ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
+ left_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
- di->i_last_eb_blk = eb->h_blkno;
+ ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
} else {
/*
* 'path' is also the leftmost path which
* means it must be the only one. This gets
* handled differently because we want to
- * revert the inode back to having extents
+ * revert the root back to having extents
* in-line.
*/
- ocfs2_unlink_path(inode, handle, dealloc, path, 1);
+ ocfs2_unlink_path(handle, et, dealloc, path, 1);
- el = &di->id2.i_list;
+ el = et->et_root_el;
el->l_tree_depth = 0;
el->l_next_free_rec = 0;
memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
- di->i_last_eb_blk = 0;
+ ocfs2_et_set_last_eb_blk(et, 0);
}
ocfs2_journal_dirty(handle, path_root_bh(path));
@@ -2567,7 +3146,8 @@ out:
* the rightmost tree leaf record is removed so the caller is
* responsible for detecting and correcting that.
*/
-static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
+static int ocfs2_rotate_tree_left(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
@@ -2583,12 +3163,10 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
if (path->p_tree_depth == 0) {
rightmost_no_delete:
/*
- * In-inode extents. This is trivially handled, so do
+ * Inline extents. This is trivially handled, so do
* it up front.
*/
- ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
- path_leaf_bh(path),
- path_leaf_el(path));
+ ret = ocfs2_rotate_rightmost_leaf_left(handle, et, path);
if (ret)
mlog_errno(ret);
goto out;
@@ -2604,7 +3182,7 @@ rightmost_no_delete:
*
* 1) is handled via ocfs2_rotate_rightmost_leaf_left()
* 2a) we need the left branch so that we can update it with the unlink
- * 2b) we need to bring the inode back to inline extents.
+ * 2b) we need to bring the root back to inline extents.
*/
eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
@@ -2620,9 +3198,9 @@ rightmost_no_delete:
if (le16_to_cpu(el->l_next_free_rec) == 0) {
ret = -EIO;
- ocfs2_error(inode->i_sb,
- "Inode %llu has empty extent block at %llu",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has empty extent block at %llu",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
(unsigned long long)le64_to_cpu(eb->h_blkno));
goto out;
}
@@ -2636,7 +3214,7 @@ rightmost_no_delete:
* nonempty list.
*/
- ret = ocfs2_remove_rightmost_path(inode, handle, path,
+ ret = ocfs2_remove_rightmost_path(handle, et, path,
dealloc);
if (ret)
mlog_errno(ret);
@@ -2648,7 +3226,7 @@ rightmost_no_delete:
* and restarting from there.
*/
try_rotate:
- ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
+ ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, path,
dealloc, &restart_path);
if (ret && ret != -EAGAIN) {
mlog_errno(ret);
@@ -2659,7 +3237,7 @@ try_rotate:
tmp_path = restart_path;
restart_path = NULL;
- ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
+ ret = __ocfs2_rotate_tree_left(handle, et, orig_credits,
tmp_path, dealloc,
&restart_path);
if (ret && ret != -EAGAIN) {
@@ -2712,27 +3290,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
}
}
+static int ocfs2_get_right_path(struct ocfs2_extent_tree *et,
+ struct ocfs2_path *left_path,
+ struct ocfs2_path **ret_right_path)
+{
+ int ret;
+ u32 right_cpos;
+ struct ocfs2_path *right_path = NULL;
+ struct ocfs2_extent_list *left_el;
+
+ *ret_right_path = NULL;
+
+ /* This function shouldn't be called for non-trees. */
+ BUG_ON(left_path->p_tree_depth == 0);
+
+ left_el = path_leaf_el(left_path);
+ BUG_ON(left_el->l_next_free_rec != left_el->l_count);
+
+ ret = ocfs2_find_cpos_for_right_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+ left_path, &right_cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* This function shouldn't be called for the rightmost leaf. */
+ BUG_ON(right_cpos == 0);
+
+ right_path = ocfs2_new_path_from_path(left_path);
+ if (!right_path) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ *ret_right_path = right_path;
+out:
+ if (ret)
+ ocfs2_free_path(right_path);
+ return ret;
+}
+
/*
* Remove split_rec clusters from the record at index and merge them
- * onto the beginning of the record at index + 1.
+ * onto the beginning of the record "next" to it.
+ * For index < l_count - 1, the next means the extent rec at index + 1.
+ * For index == l_count - 1, the "next" means the 1st extent rec of the
+ * next extent block.
*/
-static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
- handle_t *handle,
- struct ocfs2_extent_rec *split_rec,
- struct ocfs2_extent_list *el, int index)
+static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
+ handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *split_rec,
+ int index)
{
- int ret;
+ int ret, next_free, i;
unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
struct ocfs2_extent_rec *left_rec;
struct ocfs2_extent_rec *right_rec;
+ struct ocfs2_extent_list *right_el;
+ struct ocfs2_path *right_path = NULL;
+ int subtree_index = 0;
+ struct ocfs2_extent_list *el = path_leaf_el(left_path);
+ struct buffer_head *bh = path_leaf_bh(left_path);
+ struct buffer_head *root_bh = NULL;
BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
-
left_rec = &el->l_recs[index];
- right_rec = &el->l_recs[index + 1];
- ret = ocfs2_journal_access(handle, inode, bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ if (index == le16_to_cpu(el->l_next_free_rec) - 1 &&
+ le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
+ /* we meet with a cross extent block merge. */
+ ret = ocfs2_get_right_path(et, left_path, &right_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ right_el = path_leaf_el(right_path);
+ next_free = le16_to_cpu(right_el->l_next_free_rec);
+ BUG_ON(next_free <= 0);
+ right_rec = &right_el->l_recs[0];
+ if (ocfs2_is_empty_extent(right_rec)) {
+ BUG_ON(next_free <= 1);
+ right_rec = &right_el->l_recs[1];
+ }
+
+ BUG_ON(le32_to_cpu(left_rec->e_cpos) +
+ le16_to_cpu(left_rec->e_leaf_clusters) !=
+ le32_to_cpu(right_rec->e_cpos));
+
+ subtree_index = ocfs2_find_subtree_root(et, left_path,
+ right_path);
+
+ ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
+ handle->h_buffer_credits,
+ right_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ root_bh = left_path->p_node[subtree_index].bh;
+ BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
+
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
+ subtree_index);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ for (i = subtree_index + 1;
+ i < path_num_items(right_path); i++) {
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ right_path, i);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ left_path, i);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ } else {
+ BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1);
+ right_rec = &el->l_recs[index + 1];
+ }
+
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci, left_path,
+ path_num_items(left_path) - 1);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2742,42 +3440,160 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
le32_add_cpu(&right_rec->e_cpos, -split_clusters);
le64_add_cpu(&right_rec->e_blkno,
- -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
+ -ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
+ split_clusters));
le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
ocfs2_cleanup_merge(el, index);
- ret = ocfs2_journal_dirty(handle, bh);
- if (ret)
+ ocfs2_journal_dirty(handle, bh);
+ if (right_path) {
+ ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
+ ocfs2_complete_edge_insert(handle, left_path, right_path,
+ subtree_index);
+ }
+out:
+ if (right_path)
+ ocfs2_free_path(right_path);
+ return ret;
+}
+
+static int ocfs2_get_left_path(struct ocfs2_extent_tree *et,
+ struct ocfs2_path *right_path,
+ struct ocfs2_path **ret_left_path)
+{
+ int ret;
+ u32 left_cpos;
+ struct ocfs2_path *left_path = NULL;
+
+ *ret_left_path = NULL;
+
+ /* This function shouldn't be called for non-trees. */
+ BUG_ON(right_path->p_tree_depth == 0);
+
+ ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+ right_path, &left_cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* This function shouldn't be called for the leftmost leaf. */
+ BUG_ON(left_cpos == 0);
+
+ left_path = ocfs2_new_path_from_path(right_path);
+ if (!left_path) {
+ ret = -ENOMEM;
mlog_errno(ret);
+ goto out;
+ }
+ ret = ocfs2_find_path(et->et_ci, left_path, left_cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ *ret_left_path = left_path;
out:
+ if (ret)
+ ocfs2_free_path(left_path);
return ret;
}
/*
* Remove split_rec clusters from the record at index and merge them
- * onto the tail of the record at index - 1.
+ * onto the tail of the record "before" it.
+ * For index > 0, the "before" means the extent rec at index - 1.
+ *
+ * For index == 0, the "before" means the last record of the previous
+ * extent block. And there is also a situation that we may need to
+ * remove the rightmost leaf extent block in the right_path and change
+ * the right path to indicate the new rightmost path.
*/
-static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
+static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *split_rec,
- struct ocfs2_extent_list *el, int index)
+ struct ocfs2_cached_dealloc_ctxt *dealloc,
+ int index)
{
- int ret, has_empty_extent = 0;
+ int ret, i, subtree_index = 0, has_empty_extent = 0;
unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
struct ocfs2_extent_rec *left_rec;
struct ocfs2_extent_rec *right_rec;
+ struct ocfs2_extent_list *el = path_leaf_el(right_path);
+ struct buffer_head *bh = path_leaf_bh(right_path);
+ struct buffer_head *root_bh = NULL;
+ struct ocfs2_path *left_path = NULL;
+ struct ocfs2_extent_list *left_el;
- BUG_ON(index <= 0);
+ BUG_ON(index < 0);
- left_rec = &el->l_recs[index - 1];
right_rec = &el->l_recs[index];
- if (ocfs2_is_empty_extent(&el->l_recs[0]))
- has_empty_extent = 1;
+ if (index == 0) {
+ /* we meet with a cross extent block merge. */
+ ret = ocfs2_get_left_path(et, right_path, &left_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
- ret = ocfs2_journal_access(handle, inode, bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ left_el = path_leaf_el(left_path);
+ BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
+ le16_to_cpu(left_el->l_count));
+
+ left_rec = &left_el->l_recs[
+ le16_to_cpu(left_el->l_next_free_rec) - 1];
+ BUG_ON(le32_to_cpu(left_rec->e_cpos) +
+ le16_to_cpu(left_rec->e_leaf_clusters) !=
+ le32_to_cpu(split_rec->e_cpos));
+
+ subtree_index = ocfs2_find_subtree_root(et, left_path,
+ right_path);
+
+ ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
+ handle->h_buffer_credits,
+ left_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ root_bh = left_path->p_node[subtree_index].bh;
+ BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
+
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
+ subtree_index);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ for (i = subtree_index + 1;
+ i < path_num_items(right_path); i++) {
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ right_path, i);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
+ left_path, i);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+ } else {
+ left_rec = &el->l_recs[index - 1];
+ if (ocfs2_is_empty_extent(&el->l_recs[0]))
+ has_empty_extent = 1;
+ }
+
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
+ path_num_items(right_path) - 1);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2790,36 +3606,62 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
*left_rec = *split_rec;
has_empty_extent = 0;
- } else {
+ } else
le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
- }
le32_add_cpu(&right_rec->e_cpos, split_clusters);
le64_add_cpu(&right_rec->e_blkno,
- ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
+ ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
+ split_clusters));
le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
ocfs2_cleanup_merge(el, index);
- ret = ocfs2_journal_dirty(handle, bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, bh);
+ if (left_path) {
+ ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
+ /*
+ * In the situation that the right_rec is empty and the extent
+ * block is empty also, ocfs2_complete_edge_insert can't handle
+ * it and we need to delete the right extent block.
+ */
+ if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
+ le16_to_cpu(el->l_next_free_rec) == 1) {
+
+ ret = ocfs2_remove_rightmost_path(handle, et,
+ right_path,
+ dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* Now the rightmost extent block has been deleted.
+ * So we use the new rightmost path.
+ */
+ ocfs2_mv_path(right_path, left_path);
+ left_path = NULL;
+ } else
+ ocfs2_complete_edge_insert(handle, left_path,
+ right_path, subtree_index);
+ }
out:
+ if (left_path)
+ ocfs2_free_path(left_path);
return ret;
}
-static int ocfs2_try_to_merge_extent(struct inode *inode,
- handle_t *handle,
- struct ocfs2_path *left_path,
+static int ocfs2_try_to_merge_extent(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_path *path,
int split_index,
struct ocfs2_extent_rec *split_rec,
struct ocfs2_cached_dealloc_ctxt *dealloc,
struct ocfs2_merge_ctxt *ctxt)
-
{
int ret = 0;
- struct ocfs2_extent_list *el = path_leaf_el(left_path);
+ struct ocfs2_extent_list *el = path_leaf_el(path);
struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
@@ -2832,8 +3674,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* extents - having more than one in a leaf is
* illegal.
*/
- ret = ocfs2_rotate_tree_left(inode, handle, left_path,
- dealloc);
+ ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2847,7 +3688,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* Left-right contig implies this.
*/
BUG_ON(!ctxt->c_split_covers_rec);
- BUG_ON(split_index == 0);
/*
* Since the leftright insert always covers the entire
@@ -2858,9 +3698,13 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* Since the adding of an empty extent shifts
* everything back to the right, there's no need to
* update split_index here.
+ *
+ * When the split_index is zero, we need to merge it to the
+ * prevoius extent block. It is more efficient and easier
+ * if we do merge_right first and merge_left later.
*/
- ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
- handle, split_rec, el, split_index);
+ ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
+ split_index);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2871,33 +3715,28 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
*/
BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
- /*
- * The left merge left us with an empty extent, remove
- * it.
- */
- ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc);
+ /* The merge left us with an empty extent, remove it. */
+ ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
if (ret) {
mlog_errno(ret);
goto out;
}
- split_index--;
+
rec = &el->l_recs[split_index];
/*
* Note that we don't pass split_rec here on purpose -
- * we've merged it into the left side.
+ * we've merged it into the rec already.
*/
- ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path),
- handle, rec, el, split_index);
+ ret = ocfs2_merge_rec_left(path, handle, et, rec,
+ dealloc, split_index);
+
if (ret) {
mlog_errno(ret);
goto out;
}
- BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
-
- ret = ocfs2_rotate_tree_left(inode, handle, left_path,
- dealloc);
+ ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
/*
* Error from this last rotate is not critical, so
* print but don't bubble it up.
@@ -2914,18 +3753,16 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* the record on the left (hence the left merge).
*/
if (ctxt->c_contig_type == CONTIG_RIGHT) {
- ret = ocfs2_merge_rec_left(inode,
- path_leaf_bh(left_path),
- handle, split_rec, el,
+ ret = ocfs2_merge_rec_left(path, handle, et,
+ split_rec, dealloc,
split_index);
if (ret) {
mlog_errno(ret);
goto out;
}
} else {
- ret = ocfs2_merge_rec_right(inode,
- path_leaf_bh(left_path),
- handle, split_rec, el,
+ ret = ocfs2_merge_rec_right(path, handle,
+ et, split_rec,
split_index);
if (ret) {
mlog_errno(ret);
@@ -2938,7 +3775,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* The merge may have left an empty extent in
* our leaf. Try to rotate it away.
*/
- ret = ocfs2_rotate_tree_left(inode, handle, left_path,
+ ret = ocfs2_rotate_tree_left(handle, et, path,
dealloc);
if (ret)
mlog_errno(ret);
@@ -2985,10 +3822,10 @@ static void ocfs2_subtract_from_rec(struct super_block *sb,
* list. If this leaf is part of an allocation tree, it is assumed
* that the tree above has been prepared.
*/
-static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
+static void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et,
+ struct ocfs2_extent_rec *insert_rec,
struct ocfs2_extent_list *el,
- struct ocfs2_insert_type *insert,
- struct inode *inode)
+ struct ocfs2_insert_type *insert)
{
int i = insert->ins_contig_index;
unsigned int range;
@@ -3000,7 +3837,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
BUG_ON(i == -1);
rec = &el->l_recs[i];
- ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec,
+ ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+ insert->ins_split, rec,
insert_rec);
goto rotate;
}
@@ -3042,10 +3880,10 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
le16_to_cpu(el->l_count),
- "inode %lu, depth %u, count %u, next free %u, "
+ "owner %llu, depth %u, count %u, next free %u, "
"rec.cpos %u, rec.clusters %u, "
"insert.cpos %u, insert.clusters %u\n",
- inode->i_ino,
+ ocfs2_metadata_cache_owner(et->et_ci),
le16_to_cpu(el->l_tree_depth),
le16_to_cpu(el->l_count),
le16_to_cpu(el->l_next_free_rec),
@@ -3073,18 +3911,8 @@ rotate:
ocfs2_rotate_leaf(el, insert_rec);
}
-static inline void ocfs2_update_dinode_clusters(struct inode *inode,
- struct ocfs2_dinode *di,
- u32 clusters)
-{
- le32_add_cpu(&di->i_clusters, clusters);
- spin_lock(&OCFS2_I(inode)->ip_lock);
- OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
- spin_unlock(&OCFS2_I(inode)->ip_lock);
-}
-
-static void ocfs2_adjust_rightmost_records(struct inode *inode,
- handle_t *handle,
+static void ocfs2_adjust_rightmost_records(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_path *path,
struct ocfs2_extent_rec *insert_rec)
{
@@ -3102,9 +3930,9 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
next_free = le16_to_cpu(el->l_next_free_rec);
if (next_free == 0) {
- ocfs2_error(inode->i_sb,
- "Dinode %llu has a bad extent list",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has a bad extent list",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
ret = -EIO;
return;
}
@@ -3117,14 +3945,12 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
le32_add_cpu(&rec->e_int_clusters,
-le32_to_cpu(rec->e_cpos));
- ret = ocfs2_journal_dirty(handle, bh);
- if (ret)
- mlog_errno(ret);
-
+ ocfs2_journal_dirty(handle, bh);
}
}
-static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
+static int ocfs2_append_rec_to_path(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *insert_rec,
struct ocfs2_path *right_path,
struct ocfs2_path **ret_left_path)
@@ -3152,31 +3978,33 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
(next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
u32 left_cpos;
- ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path,
- &left_cpos);
+ ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+ right_path, &left_cpos);
if (ret) {
mlog_errno(ret);
goto out;
}
- mlog(0, "Append may need a left path update. cpos: %u, "
- "left_cpos: %u\n", le32_to_cpu(insert_rec->e_cpos),
- left_cpos);
+ trace_ocfs2_append_rec_to_path(
+ (unsigned long long)
+ ocfs2_metadata_cache_owner(et->et_ci),
+ le32_to_cpu(insert_rec->e_cpos),
+ left_cpos);
/*
* No need to worry if the append is already in the
* leftmost leaf.
*/
if (left_cpos) {
- left_path = ocfs2_new_path(path_root_bh(right_path),
- path_root_el(right_path));
+ left_path = ocfs2_new_path_from_path(right_path);
if (!left_path) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- ret = ocfs2_find_path(inode, left_path, left_cpos);
+ ret = ocfs2_find_path(et->et_ci, left_path,
+ left_cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3189,13 +4017,13 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
}
}
- ret = ocfs2_journal_access_path(inode, handle, right_path);
+ ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
if (ret) {
mlog_errno(ret);
goto out;
}
- ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec);
+ ocfs2_adjust_rightmost_records(handle, et, right_path, insert_rec);
*ret_left_path = left_path;
ret = 0;
@@ -3206,7 +4034,7 @@ out:
return ret;
}
-static void ocfs2_split_record(struct inode *inode,
+static void ocfs2_split_record(struct ocfs2_extent_tree *et,
struct ocfs2_path *left_path,
struct ocfs2_path *right_path,
struct ocfs2_extent_rec *split_rec,
@@ -3217,7 +4045,7 @@ static void ocfs2_split_record(struct inode *inode,
struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
struct ocfs2_extent_rec *rec, *tmprec;
- right_el = path_leaf_el(right_path);;
+ right_el = path_leaf_el(right_path);
if (left_path)
left_el = path_leaf_el(left_path);
@@ -3279,20 +4107,21 @@ static void ocfs2_split_record(struct inode *inode,
}
rec = &el->l_recs[index];
- ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec);
+ ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+ split, rec, split_rec);
ocfs2_rotate_leaf(insert_el, split_rec);
}
/*
- * This function only does inserts on an allocation b-tree. For dinode
- * lists, ocfs2_insert_at_leaf() is called directly.
+ * This function only does inserts on an allocation b-tree. For tree
+ * depth = 0, ocfs2_insert_at_leaf() is called directly.
*
* right_path is the path we want to do the actual insert
* in. left_path should only be passed in if we need to update that
* portion of the tree after an edge insert.
*/
-static int ocfs2_insert_path(struct inode *inode,
- handle_t *handle,
+static int ocfs2_insert_path(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_path *left_path,
struct ocfs2_path *right_path,
struct ocfs2_extent_rec *insert_rec,
@@ -3302,23 +4131,19 @@ static int ocfs2_insert_path(struct inode *inode,
struct buffer_head *leaf_bh = path_leaf_bh(right_path);
if (left_path) {
- int credits = handle->h_buffer_credits;
-
/*
* There's a chance that left_path got passed back to
* us without being accounted for in the
* journal. Extend our transaction here to be sure we
* can change those blocks.
*/
- credits += left_path->p_tree_depth;
-
- ret = ocfs2_extend_trans(handle, credits);
+ ret = ocfs2_extend_trans(handle, left_path->p_tree_depth);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_journal_access_path(inode, handle, left_path);
+ ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -3329,7 +4154,7 @@ static int ocfs2_insert_path(struct inode *inode,
* Pass both paths to the journal. The majority of inserts
* will be touching all components anyway.
*/
- ret = ocfs2_journal_access_path(inode, handle, right_path);
+ ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -3341,7 +4166,7 @@ static int ocfs2_insert_path(struct inode *inode,
* of splits, but it's easier to just let one separate
* function sort it all out.
*/
- ocfs2_split_record(inode, left_path, right_path,
+ ocfs2_split_record(et, left_path, right_path,
insert_rec, insert->ins_split);
/*
@@ -3350,17 +4175,13 @@ static int ocfs2_insert_path(struct inode *inode,
* dirty this for us.
*/
if (left_path)
- ret = ocfs2_journal_dirty(handle,
- path_leaf_bh(left_path));
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle,
+ path_leaf_bh(left_path));
} else
- ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path),
- insert, inode);
+ ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
+ insert);
- ret = ocfs2_journal_dirty(handle, leaf_bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, leaf_bh);
if (left_path) {
/*
@@ -3369,10 +4190,10 @@ static int ocfs2_insert_path(struct inode *inode,
*
* XXX: Should we extend the transaction here?
*/
- subtree_index = ocfs2_find_subtree_root(inode, left_path,
+ subtree_index = ocfs2_find_subtree_root(et, left_path,
right_path);
- ocfs2_complete_edge_insert(inode, handle, left_path,
- right_path, subtree_index);
+ ocfs2_complete_edge_insert(handle, left_path, right_path,
+ subtree_index);
}
ret = 0;
@@ -3380,9 +4201,8 @@ out:
return ret;
}
-static int ocfs2_do_insert_extent(struct inode *inode,
- handle_t *handle,
- struct buffer_head *di_bh,
+static int ocfs2_do_insert_extent(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *insert_rec,
struct ocfs2_insert_type *type)
{
@@ -3390,25 +4210,23 @@ static int ocfs2_do_insert_extent(struct inode *inode,
u32 cpos;
struct ocfs2_path *right_path = NULL;
struct ocfs2_path *left_path = NULL;
- struct ocfs2_dinode *di;
struct ocfs2_extent_list *el;
- di = (struct ocfs2_dinode *) di_bh->b_data;
- el = &di->id2.i_list;
+ el = et->et_root_el;
- ret = ocfs2_journal_access(handle, inode, di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_et_root_journal_access(handle, et,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
}
if (le16_to_cpu(el->l_tree_depth) == 0) {
- ocfs2_insert_at_leaf(insert_rec, el, type, inode);
+ ocfs2_insert_at_leaf(et, insert_rec, el, type);
goto out_update_clusters;
}
- right_path = ocfs2_new_inode_path(di_bh);
+ right_path = ocfs2_new_path_from_et(et);
if (!right_path) {
ret = -ENOMEM;
mlog_errno(ret);
@@ -3427,7 +4245,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
cpos = UINT_MAX;
}
- ret = ocfs2_find_path(inode, right_path, cpos);
+ ret = ocfs2_find_path(et->et_ci, right_path, cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3446,7 +4264,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
* can wind up skipping both of these two special cases...
*/
if (rotate) {
- ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split,
+ ret = ocfs2_rotate_tree_right(handle, et, type->ins_split,
le32_to_cpu(insert_rec->e_cpos),
right_path, &left_path);
if (ret) {
@@ -3458,15 +4276,15 @@ static int ocfs2_do_insert_extent(struct inode *inode,
* ocfs2_rotate_tree_right() might have extended the
* transaction without re-journaling our tree root.
*/
- ret = ocfs2_journal_access(handle, inode, di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_et_root_journal_access(handle, et,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
}
} else if (type->ins_appending == APPEND_TAIL
&& type->ins_contig != CONTIG_LEFT) {
- ret = ocfs2_append_rec_to_path(inode, handle, insert_rec,
+ ret = ocfs2_append_rec_to_path(handle, et, insert_rec,
right_path, &left_path);
if (ret) {
mlog_errno(ret);
@@ -3474,7 +4292,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
}
}
- ret = ocfs2_insert_path(inode, handle, left_path, right_path,
+ ret = ocfs2_insert_path(handle, et, left_path, right_path,
insert_rec, type);
if (ret) {
mlog_errno(ret);
@@ -3483,12 +4301,10 @@ static int ocfs2_do_insert_extent(struct inode *inode,
out_update_clusters:
if (type->ins_split == SPLIT_NONE)
- ocfs2_update_dinode_clusters(inode, di,
- le16_to_cpu(insert_rec->e_leaf_clusters));
+ ocfs2_et_update_clusters(et,
+ le16_to_cpu(insert_rec->e_leaf_clusters));
- ret = ocfs2_journal_dirty(handle, di_bh);
- if (ret)
- mlog_errno(ret);
+ ocfs2_journal_dirty(handle, et->et_root_bh);
out:
ocfs2_free_path(left_path);
@@ -3498,33 +4314,115 @@ out:
}
static enum ocfs2_contig_type
-ocfs2_figure_merge_contig_type(struct inode *inode,
+ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
+ struct ocfs2_path *path,
struct ocfs2_extent_list *el, int index,
struct ocfs2_extent_rec *split_rec)
{
- struct ocfs2_extent_rec *rec;
+ int status;
enum ocfs2_contig_type ret = CONTIG_NONE;
+ u32 left_cpos, right_cpos;
+ struct ocfs2_extent_rec *rec = NULL;
+ struct ocfs2_extent_list *new_el;
+ struct ocfs2_path *left_path = NULL, *right_path = NULL;
+ struct buffer_head *bh;
+ struct ocfs2_extent_block *eb;
+ struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
+
+ if (index > 0) {
+ rec = &el->l_recs[index - 1];
+ } else if (path->p_tree_depth > 0) {
+ status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
+ if (status)
+ goto out;
+
+ if (left_cpos != 0) {
+ left_path = ocfs2_new_path_from_path(path);
+ if (!left_path)
+ goto out;
+
+ status = ocfs2_find_path(et->et_ci, left_path,
+ left_cpos);
+ if (status)
+ goto out;
+
+ new_el = path_leaf_el(left_path);
+
+ if (le16_to_cpu(new_el->l_next_free_rec) !=
+ le16_to_cpu(new_el->l_count)) {
+ bh = path_leaf_bh(left_path);
+ eb = (struct ocfs2_extent_block *)bh->b_data;
+ ocfs2_error(sb,
+ "Extent block #%llu has an "
+ "invalid l_next_free_rec of "
+ "%d. It should have "
+ "matched the l_count of %d",
+ (unsigned long long)le64_to_cpu(eb->h_blkno),
+ le16_to_cpu(new_el->l_next_free_rec),
+ le16_to_cpu(new_el->l_count));
+ status = -EINVAL;
+ goto out;
+ }
+ rec = &new_el->l_recs[
+ le16_to_cpu(new_el->l_next_free_rec) - 1];
+ }
+ }
/*
* We're careful to check for an empty extent record here -
* the merge code will know what to do if it sees one.
*/
-
- if (index > 0) {
- rec = &el->l_recs[index - 1];
+ if (rec) {
if (index == 1 && ocfs2_is_empty_extent(rec)) {
if (split_rec->e_cpos == el->l_recs[index].e_cpos)
ret = CONTIG_RIGHT;
} else {
- ret = ocfs2_extent_contig(inode, rec, split_rec);
+ ret = ocfs2_et_extent_contig(et, rec, split_rec);
}
}
- if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) {
+ rec = NULL;
+ if (index < (le16_to_cpu(el->l_next_free_rec) - 1))
+ rec = &el->l_recs[index + 1];
+ else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
+ path->p_tree_depth > 0) {
+ status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
+ if (status)
+ goto out;
+
+ if (right_cpos == 0)
+ goto out;
+
+ right_path = ocfs2_new_path_from_path(path);
+ if (!right_path)
+ goto out;
+
+ status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
+ if (status)
+ goto out;
+
+ new_el = path_leaf_el(right_path);
+ rec = &new_el->l_recs[0];
+ if (ocfs2_is_empty_extent(rec)) {
+ if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
+ bh = path_leaf_bh(right_path);
+ eb = (struct ocfs2_extent_block *)bh->b_data;
+ ocfs2_error(sb,
+ "Extent block #%llu has an "
+ "invalid l_next_free_rec of %d",
+ (unsigned long long)le64_to_cpu(eb->h_blkno),
+ le16_to_cpu(new_el->l_next_free_rec));
+ status = -EINVAL;
+ goto out;
+ }
+ rec = &new_el->l_recs[1];
+ }
+ }
+
+ if (rec) {
enum ocfs2_contig_type contig_type;
- rec = &el->l_recs[index + 1];
- contig_type = ocfs2_extent_contig(inode, rec, split_rec);
+ contig_type = ocfs2_et_extent_contig(et, rec, split_rec);
if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
ret = CONTIG_LEFTRIGHT;
@@ -3532,10 +4430,16 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
ret = contig_type;
}
+out:
+ if (left_path)
+ ocfs2_free_path(left_path);
+ if (right_path)
+ ocfs2_free_path(right_path);
+
return ret;
}
-static void ocfs2_figure_contig_type(struct inode *inode,
+static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
struct ocfs2_insert_type *insert,
struct ocfs2_extent_list *el,
struct ocfs2_extent_rec *insert_rec)
@@ -3546,14 +4450,29 @@ static void ocfs2_figure_contig_type(struct inode *inode,
BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
- contig_type = ocfs2_extent_contig(inode, &el->l_recs[i],
- insert_rec);
+ contig_type = ocfs2_et_extent_contig(et, &el->l_recs[i],
+ insert_rec);
if (contig_type != CONTIG_NONE) {
insert->ins_contig_index = i;
break;
}
}
insert->ins_contig = contig_type;
+
+ if (insert->ins_contig != CONTIG_NONE) {
+ struct ocfs2_extent_rec *rec =
+ &el->l_recs[insert->ins_contig_index];
+ unsigned int len = le16_to_cpu(rec->e_leaf_clusters) +
+ le16_to_cpu(insert_rec->e_leaf_clusters);
+
+ /*
+ * Caller might want us to limit the size of extents, don't
+ * calculate contiguousness if we might exceed that limit.
+ */
+ if (et->et_max_leaf_clusters &&
+ (len > et->et_max_leaf_clusters))
+ insert->ins_contig = CONTIG_NONE;
+ }
}
/*
@@ -3562,8 +4481,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
* ocfs2_figure_appending_type() will figure out whether we'll have to
* insert at the tail of the rightmost leaf.
*
- * This should also work against the dinode list for tree's with 0
- * depth. If we consider the dinode list to be the rightmost leaf node
+ * This should also work against the root extent list for tree's with 0
+ * depth. If we consider the root extent list to be the rightmost leaf node
* then the logic here makes sense.
*/
static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
@@ -3601,7 +4520,7 @@ set_tail_append:
}
/*
- * Helper function called at the begining of an insert.
+ * Helper function called at the beginning of an insert.
*
* This computes a few things that are commonly used in the process of
* inserting into the btree:
@@ -3613,15 +4532,13 @@ set_tail_append:
* All of the information is stored on the ocfs2_insert_type
* structure.
*/
-static int ocfs2_figure_insert_type(struct inode *inode,
- struct buffer_head *di_bh,
+static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
struct buffer_head **last_eb_bh,
struct ocfs2_extent_rec *insert_rec,
int *free_records,
struct ocfs2_insert_type *insert)
{
int ret;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
struct ocfs2_path *path = NULL;
@@ -3629,7 +4546,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
insert->ins_split = SPLIT_NONE;
- el = &di->id2.i_list;
+ el = et->et_root_el;
insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
if (el->l_tree_depth) {
@@ -3639,11 +4556,11 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* ocfs2_figure_insert_type() and ocfs2_add_branch()
* may want it later.
*/
- ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
- le64_to_cpu(di->i_last_eb_blk), &bh,
- OCFS2_BH_CACHED, inode);
+ ret = ocfs2_read_extent_block(et->et_ci,
+ ocfs2_et_get_last_eb_blk(et),
+ &bh);
if (ret) {
- mlog_exit(ret);
+ mlog_errno(ret);
goto out;
}
eb = (struct ocfs2_extent_block *) bh->b_data;
@@ -3662,12 +4579,12 @@ static int ocfs2_figure_insert_type(struct inode *inode,
le16_to_cpu(el->l_next_free_rec);
if (!insert->ins_tree_depth) {
- ocfs2_figure_contig_type(inode, insert, el, insert_rec);
+ ocfs2_figure_contig_type(et, insert, el, insert_rec);
ocfs2_figure_appending_type(insert, el, insert_rec);
return 0;
}
- path = ocfs2_new_inode_path(di_bh);
+ path = ocfs2_new_path_from_et(et);
if (!path) {
ret = -ENOMEM;
mlog_errno(ret);
@@ -3680,7 +4597,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* us the rightmost tree path. This is accounted for below in
* the appending code.
*/
- ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos));
+ ret = ocfs2_find_path(et->et_ci, path, le32_to_cpu(insert_rec->e_cpos));
if (ret) {
mlog_errno(ret);
goto out;
@@ -3696,7 +4613,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* into two types of appends: simple record append, or a
* rotate inside the tail leaf.
*/
- ocfs2_figure_contig_type(inode, insert, el, insert_rec);
+ ocfs2_figure_contig_type(et, insert, el, insert_rec);
/*
* The insert code isn't quite ready to deal with all cases of
@@ -3717,7 +4634,8 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* the case that we're doing a tail append, so maybe we can
* take advantage of that information somehow.
*/
- if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) {
+ if (ocfs2_et_get_last_eb_blk(et) ==
+ path_leaf_bh(path)->b_blocknr) {
/*
* Ok, ocfs2_find_path() returned us the rightmost
* tree path. This might be an appending insert. There are
@@ -3740,14 +4658,12 @@ out:
}
/*
- * Insert an extent into an inode btree.
+ * Insert an extent into a btree.
*
- * The caller needs to update fe->i_clusters
+ * The caller needs to update the owning btree's cluster count.
*/
-int ocfs2_insert_extent(struct ocfs2_super *osb,
- handle_t *handle,
- struct inode *inode,
- struct buffer_head *fe_bh,
+int ocfs2_insert_extent(handle_t *handle,
+ struct ocfs2_extent_tree *et,
u32 cpos,
u64 start_blk,
u32 new_clusters,
@@ -3760,40 +4676,34 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
struct ocfs2_insert_type insert = {0, };
struct ocfs2_extent_rec rec;
- BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
-
- mlog(0, "add %u clusters at position %u to inode %llu\n",
- new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
-
- mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
- (OCFS2_I(inode)->ip_clusters != cpos),
- "Device %s, asking for sparse allocation: inode %llu, "
- "cpos %u, clusters %u\n",
- osb->dev_str,
- (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
- OCFS2_I(inode)->ip_clusters);
+ trace_ocfs2_insert_extent_start(
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ cpos, new_clusters);
memset(&rec, 0, sizeof(rec));
rec.e_cpos = cpu_to_le32(cpos);
rec.e_blkno = cpu_to_le64(start_blk);
rec.e_leaf_clusters = cpu_to_le16(new_clusters);
rec.e_flags = flags;
+ status = ocfs2_et_insert_check(et, &rec);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
- status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
+ status = ocfs2_figure_insert_type(et, &last_eb_bh, &rec,
&free_records, &insert);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- mlog(0, "Insert.appending: %u, Insert.Contig: %u, "
- "Insert.contig_index: %d, Insert.free_records: %d, "
- "Insert.tree_depth: %d\n",
- insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
- free_records, insert.ins_tree_depth);
+ trace_ocfs2_insert_extent(insert.ins_appending, insert.ins_contig,
+ insert.ins_contig_index, free_records,
+ insert.ins_tree_depth);
if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
- status = ocfs2_grow_tree(inode, handle, fe_bh,
+ status = ocfs2_grow_tree(handle, et,
&insert.ins_tree_depth, &last_eb_bh,
meta_ac);
if (status) {
@@ -3803,17 +4713,134 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
}
/* Finally, we can add clusters. This might rotate the tree for us. */
- status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
+ status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
if (status < 0)
mlog_errno(status);
else
- ocfs2_extent_map_insert_rec(inode, &rec);
+ ocfs2_et_extent_map_insert(et, &rec);
bail:
- if (last_eb_bh)
- brelse(last_eb_bh);
+ brelse(last_eb_bh);
- mlog_exit(status);
+ return status;
+}
+
+/*
+ * Allcate and add clusters into the extent b-tree.
+ * The new clusters(clusters_to_add) will be inserted at logical_offset.
+ * The extent b-tree's root is specified by et, and
+ * it is not limited to the file storage. Any extent tree can use this
+ * function if it implements the proper ocfs2_extent_tree.
+ */
+int ocfs2_add_clusters_in_btree(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ u32 *logical_offset,
+ u32 clusters_to_add,
+ int mark_unwritten,
+ struct ocfs2_alloc_context *data_ac,
+ struct ocfs2_alloc_context *meta_ac,
+ enum ocfs2_alloc_restarted *reason_ret)
+{
+ int status = 0, err = 0;
+ int need_free = 0;
+ int free_extents;
+ enum ocfs2_alloc_restarted reason = RESTART_NONE;
+ u32 bit_off, num_bits;
+ u64 block;
+ u8 flags = 0;
+ struct ocfs2_super *osb =
+ OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
+
+ BUG_ON(!clusters_to_add);
+
+ if (mark_unwritten)
+ flags = OCFS2_EXT_UNWRITTEN;
+
+ free_extents = ocfs2_num_free_extents(osb, et);
+ if (free_extents < 0) {
+ status = free_extents;
+ mlog_errno(status);
+ goto leave;
+ }
+
+ /* there are two cases which could cause us to EAGAIN in the
+ * we-need-more-metadata case:
+ * 1) we haven't reserved *any*
+ * 2) we are so fragmented, we've needed to add metadata too
+ * many times. */
+ if (!free_extents && !meta_ac) {
+ err = -1;
+ status = -EAGAIN;
+ reason = RESTART_META;
+ goto leave;
+ } else if ((!free_extents)
+ && (ocfs2_alloc_context_bits_left(meta_ac)
+ < ocfs2_extend_meta_needed(et->et_root_el))) {
+ err = -2;
+ status = -EAGAIN;
+ reason = RESTART_META;
+ goto leave;
+ }
+
+ status = __ocfs2_claim_clusters(handle, data_ac, 1,
+ clusters_to_add, &bit_off, &num_bits);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ goto leave;
+ }
+
+ BUG_ON(num_bits > clusters_to_add);
+
+ /* reserve our write early -- insert_extent may update the tree root */
+ status = ocfs2_et_root_journal_access(handle, et,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ need_free = 1;
+ goto bail;
+ }
+
+ block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
+ trace_ocfs2_add_clusters_in_btree(
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ bit_off, num_bits);
+ status = ocfs2_insert_extent(handle, et, *logical_offset, block,
+ num_bits, flags, meta_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ need_free = 1;
+ goto bail;
+ }
+
+ ocfs2_journal_dirty(handle, et->et_root_bh);
+
+ clusters_to_add -= num_bits;
+ *logical_offset += num_bits;
+
+ if (clusters_to_add) {
+ err = clusters_to_add;
+ status = -EAGAIN;
+ reason = RESTART_TRANS;
+ }
+
+bail:
+ if (need_free) {
+ if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+ ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+ bit_off, num_bits);
+ else
+ ocfs2_free_clusters(handle,
+ data_ac->ac_inode,
+ data_ac->ac_bh,
+ ocfs2_clusters_to_blocks(osb->sb, bit_off),
+ num_bits);
+ }
+
+leave:
+ if (reason_ret)
+ *reason_ret = reason;
+ trace_ocfs2_add_clusters_in_btree_ret(status, reason, err);
return status;
}
@@ -3837,10 +4864,9 @@ static void ocfs2_make_right_split_rec(struct super_block *sb,
split_rec->e_flags = rec->e_flags;
}
-static int ocfs2_split_and_insert(struct inode *inode,
- handle_t *handle,
+static int ocfs2_split_and_insert(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_path *path,
- struct buffer_head *di_bh,
struct buffer_head **last_eb_bh,
int split_index,
struct ocfs2_extent_rec *orig_split_rec,
@@ -3854,7 +4880,6 @@ static int ocfs2_split_and_insert(struct inode *inode,
struct ocfs2_extent_rec split_rec = *orig_split_rec;
struct ocfs2_insert_type insert;
struct ocfs2_extent_block *eb;
- struct ocfs2_dinode *di;
leftright:
/*
@@ -3863,8 +4888,7 @@ leftright:
*/
rec = path_leaf_el(path)->l_recs[split_index];
- di = (struct ocfs2_dinode *)di_bh->b_data;
- rightmost_el = &di->id2.i_list;
+ rightmost_el = et->et_root_el;
depth = le16_to_cpu(rightmost_el->l_tree_depth);
if (depth) {
@@ -3875,8 +4899,8 @@ leftright:
if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
le16_to_cpu(rightmost_el->l_count)) {
- ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
- meta_ac);
+ ret = ocfs2_grow_tree(handle, et,
+ &depth, last_eb_bh, meta_ac);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3904,8 +4928,8 @@ leftright:
*/
insert.ins_split = SPLIT_RIGHT;
- ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range,
- &rec);
+ ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+ &tmprec, insert_range, &rec);
split_rec = tmprec;
@@ -3913,8 +4937,7 @@ leftright:
do_leftright = 1;
}
- ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec,
- &insert);
+ ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3930,7 +4953,7 @@ leftright:
ocfs2_reinit_path(path, 1);
cpos = le32_to_cpu(split_rec.e_cpos);
- ret = ocfs2_find_path(inode, path, cpos);
+ ret = ocfs2_find_path(et->et_ci, path, cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3945,10 +4968,32 @@ out:
return ret;
}
+static int ocfs2_replace_extent_rec(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_path *path,
+ struct ocfs2_extent_list *el,
+ int split_index,
+ struct ocfs2_extent_rec *split_rec)
+{
+ int ret;
+
+ ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
+ path_num_items(path) - 1);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ el->l_recs[split_index] = *split_rec;
+
+ ocfs2_journal_dirty(handle, path_leaf_bh(path));
+out:
+ return ret;
+}
+
/*
- * Mark part or all of the extent record at split_index in the leaf
- * pointed to by path as written. This removes the unwritten
- * extent flag.
+ * Split part or all of the extent record at split_index in the leaf
+ * pointed to by path. Merge with the contiguous extent record if needed.
*
* Care is taken to handle contiguousness so as to not grow the tree.
*
@@ -3956,22 +5001,22 @@ out:
* of the tree is required. All other cases will degrade into a less
* optimal tree layout.
*
- * last_eb_bh should be the rightmost leaf block for any inode with a
- * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call.
+ * last_eb_bh should be the rightmost leaf block for any extent
+ * btree. Since a split may grow the tree or a merge might shrink it,
+ * the caller cannot trust the contents of that buffer after this call.
*
* This code is optimized for readability - several passes might be
* made over certain portions of the tree. All of those blocks will
* have been brought into cache (and pinned via the journal), so the
* extra overhead is not expressed in terms of disk reads.
*/
-static int __ocfs2_mark_extent_written(struct inode *inode,
- struct buffer_head *di_bh,
- handle_t *handle,
- struct ocfs2_path *path,
- int split_index,
- struct ocfs2_extent_rec *split_rec,
- struct ocfs2_alloc_context *meta_ac,
- struct ocfs2_cached_dealloc_ctxt *dealloc)
+int ocfs2_split_extent(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_path *path,
+ int split_index,
+ struct ocfs2_extent_rec *split_rec,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
{
int ret = 0;
struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -3980,12 +5025,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
struct ocfs2_merge_ctxt ctxt;
struct ocfs2_extent_list *rightmost_el;
- if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) {
- ret = -EIO;
- mlog_errno(ret);
- goto out;
- }
-
if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
(le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
@@ -3994,34 +5033,27 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
goto out;
}
- ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el,
+ ctxt.c_contig_type = ocfs2_figure_merge_contig_type(et, path, el,
split_index,
split_rec);
/*
* The core merge / split code wants to know how much room is
- * left in this inodes allocation tree, so we pass the
+ * left in this allocation tree, so we pass the
* rightmost extent list.
*/
if (path->p_tree_depth) {
struct ocfs2_extent_block *eb;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
- ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
- le64_to_cpu(di->i_last_eb_blk),
- &last_eb_bh, OCFS2_BH_CACHED, inode);
+ ret = ocfs2_read_extent_block(et->et_ci,
+ ocfs2_et_get_last_eb_blk(et),
+ &last_eb_bh);
if (ret) {
- mlog_exit(ret);
+ mlog_errno(ret);
goto out;
}
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EROFS;
- goto out;
- }
-
rightmost_el = &eb->h_list;
} else
rightmost_el = path_root_el(path);
@@ -4034,21 +5066,22 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
- mlog(0, "index: %d, contig: %u, has_empty: %u, split_covers: %u\n",
- split_index, ctxt.c_contig_type, ctxt.c_has_empty_extent,
- ctxt.c_split_covers_rec);
+ trace_ocfs2_split_extent(split_index, ctxt.c_contig_type,
+ ctxt.c_has_empty_extent,
+ ctxt.c_split_covers_rec);
if (ctxt.c_contig_type == CONTIG_NONE) {
if (ctxt.c_split_covers_rec)
- el->l_recs[split_index] = *split_rec;
+ ret = ocfs2_replace_extent_rec(handle, et, path, el,
+ split_index, split_rec);
else
- ret = ocfs2_split_and_insert(inode, handle, path, di_bh,
+ ret = ocfs2_split_and_insert(handle, et, path,
&last_eb_bh, split_index,
split_rec, meta_ac);
if (ret)
mlog_errno(ret);
} else {
- ret = ocfs2_try_to_merge_extent(inode, handle, path,
+ ret = ocfs2_try_to_merge_extent(handle, et, path,
split_index, split_rec,
dealloc, &ctxt);
if (ret)
@@ -4061,50 +5094,40 @@ out:
}
/*
- * Mark the already-existing extent at cpos as written for len clusters.
+ * Change the flags of the already-existing extent at cpos for len clusters.
+ *
+ * new_flags: the flags we want to set.
+ * clear_flags: the flags we want to clear.
+ * phys: the new physical offset we want this new extent starts from.
*
* If the existing extent is larger than the request, initiate a
* split. An attempt will be made at merging with adjacent extents.
*
* The caller is responsible for passing down meta_ac if we'll need it.
*/
-int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
- handle_t *handle, u32 cpos, u32 len, u32 phys,
- struct ocfs2_alloc_context *meta_ac,
- struct ocfs2_cached_dealloc_ctxt *dealloc)
+int ocfs2_change_extent_flag(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ u32 cpos, u32 len, u32 phys,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_cached_dealloc_ctxt *dealloc,
+ int new_flags, int clear_flags)
{
int ret, index;
- u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys);
+ struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
+ u64 start_blkno = ocfs2_clusters_to_blocks(sb, phys);
struct ocfs2_extent_rec split_rec;
struct ocfs2_path *left_path = NULL;
struct ocfs2_extent_list *el;
+ struct ocfs2_extent_rec *rec;
- mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
- inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
-
- if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
- ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
- "that are being written to, but the feature bit "
- "is not set in the super block.",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
- ret = -EROFS;
- goto out;
- }
-
- /*
- * XXX: This should be fixed up so that we just re-insert the
- * next extent records.
- */
- ocfs2_extent_map_trunc(inode, 0);
-
- left_path = ocfs2_new_inode_path(di_bh);
+ left_path = ocfs2_new_path_from_et(et);
if (!left_path) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- ret = ocfs2_find_path(inode, left_path, cpos);
+ ret = ocfs2_find_path(et->et_ci, left_path, cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4113,38 +5136,107 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
index = ocfs2_search_extent_list(el, cpos);
if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
- ocfs2_error(inode->i_sb,
- "Inode %llu has an extent at cpos %u which can no "
+ ocfs2_error(sb,
+ "Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
+ (unsigned long long)
+ ocfs2_metadata_cache_owner(et->et_ci), cpos);
ret = -EROFS;
goto out;
}
+ ret = -EIO;
+ rec = &el->l_recs[index];
+ if (new_flags && (rec->e_flags & new_flags)) {
+ mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
+ "extent that already had them",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ new_flags);
+ goto out;
+ }
+
+ if (clear_flags && !(rec->e_flags & clear_flags)) {
+ mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
+ "extent that didn't have them",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ clear_flags);
+ goto out;
+ }
+
memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
split_rec.e_cpos = cpu_to_le32(cpos);
split_rec.e_leaf_clusters = cpu_to_le16(len);
split_rec.e_blkno = cpu_to_le64(start_blkno);
- split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
- split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
-
- ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path,
- index, &split_rec, meta_ac, dealloc);
+ split_rec.e_flags = rec->e_flags;
+ if (new_flags)
+ split_rec.e_flags |= new_flags;
+ if (clear_flags)
+ split_rec.e_flags &= ~clear_flags;
+
+ ret = ocfs2_split_extent(handle, et, left_path,
+ index, &split_rec, meta_ac,
+ dealloc);
if (ret)
mlog_errno(ret);
out:
ocfs2_free_path(left_path);
return ret;
+
}
-static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
- handle_t *handle, struct ocfs2_path *path,
+/*
+ * Mark the already-existing extent at cpos as written for len clusters.
+ * This removes the unwritten extent flag.
+ *
+ * If the existing extent is larger than the request, initiate a
+ * split. An attempt will be made at merging with adjacent extents.
+ *
+ * The caller is responsible for passing down meta_ac if we'll need it.
+ */
+int ocfs2_mark_extent_written(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ handle_t *handle, u32 cpos, u32 len, u32 phys,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+ int ret;
+
+ trace_ocfs2_mark_extent_written(
+ (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ cpos, len, phys);
+
+ if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
+ ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
+ "that are being written to, but the feature bit "
+ "is not set in the super block.",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ ret = -EROFS;
+ goto out;
+ }
+
+ /*
+ * XXX: This should be fixed up so that we just re-insert the
+ * next extent records.
+ */
+ ocfs2_et_extent_map_truncate(et, 0);
+
+ ret = ocfs2_change_extent_flag(handle, et, cpos,
+ len, phys, meta_ac, dealloc,
+ 0, OCFS2_EXT_UNWRITTEN);
+ if (ret)
+ mlog_errno(ret);
+
+out:
+ return ret;
+}
+
+static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
+ struct ocfs2_path *path,
int index, u32 new_range,
struct ocfs2_alloc_context *meta_ac)
{
- int ret, depth, credits = handle->h_buffer_credits;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+ int ret, depth, credits;
struct buffer_head *last_eb_bh = NULL;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *rightmost_el, *el;
@@ -4157,13 +5249,14 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
*/
el = path_leaf_el(path);
rec = &el->l_recs[index];
- ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec);
+ ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+ &split_rec, new_range, rec);
depth = path->p_tree_depth;
if (depth > 0) {
- ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
- le64_to_cpu(di->i_last_eb_blk),
- &last_eb_bh, OCFS2_BH_CACHED, inode);
+ ret = ocfs2_read_extent_block(et->et_ci,
+ ocfs2_et_get_last_eb_blk(et),
+ &last_eb_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -4174,7 +5267,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
} else
rightmost_el = path_leaf_el(path);
- credits += path->p_tree_depth + ocfs2_extend_meta_needed(di);
+ credits = path->p_tree_depth +
+ ocfs2_extend_meta_needed(et->et_root_el);
ret = ocfs2_extend_trans(handle, credits);
if (ret) {
mlog_errno(ret);
@@ -4183,7 +5277,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
le16_to_cpu(rightmost_el->l_count)) {
- ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh,
+ ret = ocfs2_grow_tree(handle, et, &depth, &last_eb_bh,
meta_ac);
if (ret) {
mlog_errno(ret);
@@ -4197,7 +5291,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
insert.ins_split = SPLIT_RIGHT;
insert.ins_tree_depth = depth;
- ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert);
+ ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
if (ret)
mlog_errno(ret);
@@ -4206,7 +5300,8 @@ out:
return ret;
}
-static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
+static int ocfs2_truncate_rec(handle_t *handle,
+ struct ocfs2_extent_tree *et,
struct ocfs2_path *path, int index,
struct ocfs2_cached_dealloc_ctxt *dealloc,
u32 cpos, u32 len)
@@ -4214,14 +5309,14 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
int ret;
u32 left_cpos, rec_range, trunc_range;
int wants_rotate = 0, is_rightmost_tree_rec = 0;
- struct super_block *sb = inode->i_sb;
+ struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
struct ocfs2_path *left_path = NULL;
struct ocfs2_extent_list *el = path_leaf_el(path);
struct ocfs2_extent_rec *rec;
struct ocfs2_extent_block *eb;
if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
- ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
+ ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4253,29 +5348,28 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
* by this leaf and the one to it's left.
*
* There are two cases we can skip:
- * 1) Path is the leftmost one in our inode tree.
+ * 1) Path is the leftmost one in our btree.
* 2) The leaf is rightmost and will be empty after
* we remove the extent record - the rotate code
* knows how to update the newly formed edge.
*/
- ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path,
- &left_cpos);
+ ret = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
if (ret) {
mlog_errno(ret);
goto out;
}
if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) {
- left_path = ocfs2_new_path(path_root_bh(path),
- path_root_el(path));
+ left_path = ocfs2_new_path_from_path(path);
if (!left_path) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- ret = ocfs2_find_path(inode, left_path, left_cpos);
+ ret = ocfs2_find_path(et->et_ci, left_path,
+ left_cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4291,13 +5385,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
goto out;
}
- ret = ocfs2_journal_access_path(inode, handle, path);
+ ret = ocfs2_journal_access_path(et->et_ci, handle, path);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_journal_access_path(inode, handle, left_path);
+ ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4320,7 +5414,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
* be deleted by the rotate code.
*/
rec = &el->l_recs[next_free - 1];
- ocfs2_adjust_rightmost_records(inode, handle, path,
+ ocfs2_adjust_rightmost_records(handle, et, path,
rec);
}
} else if (le32_to_cpu(rec->e_cpos) == cpos) {
@@ -4332,11 +5426,12 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
/* Remove rightmost portion of the record */
le16_add_cpu(&rec->e_leaf_clusters, -len);
if (is_rightmost_tree_rec)
- ocfs2_adjust_rightmost_records(inode, handle, path, rec);
+ ocfs2_adjust_rightmost_records(handle, et, path, rec);
} else {
/* Caller should have trapped this. */
- mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) "
- "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ mlog(ML_ERROR, "Owner %llu: Invalid record truncate: (%u, %u) "
+ "(%u, %u)\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
le32_to_cpu(rec->e_cpos),
le16_to_cpu(rec->e_leaf_clusters), cpos, len);
BUG();
@@ -4345,14 +5440,14 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
if (left_path) {
int subtree_index;
- subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
- ocfs2_complete_edge_insert(inode, handle, left_path, path,
+ subtree_index = ocfs2_find_subtree_root(et, left_path, path);
+ ocfs2_complete_edge_insert(handle, left_path, path,
subtree_index);
}
ocfs2_journal_dirty(handle, path_leaf_bh(path));
- ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
+ ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4363,8 +5458,9 @@ out:
return ret;
}
-int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
- u32 cpos, u32 len, handle_t *handle,
+int ocfs2_remove_extent(handle_t *handle,
+ struct ocfs2_extent_tree *et,
+ u32 cpos, u32 len,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
@@ -4372,18 +5468,22 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
u32 rec_range, trunc_range;
struct ocfs2_extent_rec *rec;
struct ocfs2_extent_list *el;
- struct ocfs2_path *path;
+ struct ocfs2_path *path = NULL;
- ocfs2_extent_map_trunc(inode, 0);
+ /*
+ * XXX: Why are we truncating to 0 instead of wherever this
+ * affects us?
+ */
+ ocfs2_et_extent_map_truncate(et, 0);
- path = ocfs2_new_inode_path(di_bh);
+ path = ocfs2_new_path_from_et(et);
if (!path) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- ret = ocfs2_find_path(inode, path, cpos);
+ ret = ocfs2_find_path(et->et_ci, path, cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4392,10 +5492,11 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
- ocfs2_error(inode->i_sb,
- "Inode %llu has an extent at cpos %u which can no "
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ cpos);
ret = -EROFS;
goto out;
}
@@ -4422,20 +5523,20 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
- mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d "
- "(cpos %u, len %u)\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index,
- le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
+ trace_ocfs2_remove_extent(
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ cpos, len, index, le32_to_cpu(rec->e_cpos),
+ ocfs2_rec_clusters(el, rec));
if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
- ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
+ ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
cpos, len);
if (ret) {
mlog_errno(ret);
goto out;
}
} else {
- ret = ocfs2_split_tree(inode, di_bh, handle, path, index,
+ ret = ocfs2_split_tree(handle, et, path, index,
trunc_range, meta_ac);
if (ret) {
mlog_errno(ret);
@@ -4448,7 +5549,7 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
*/
ocfs2_reinit_path(path, 1);
- ret = ocfs2_find_path(inode, path, cpos);
+ ret = ocfs2_find_path(et->et_ci, path, cpos);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4457,9 +5558,9 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
- ocfs2_error(inode->i_sb,
- "Inode %llu: split at cpos %u lost record.",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu: split at cpos %u lost record.",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
cpos);
ret = -EROFS;
goto out;
@@ -4473,17 +5574,17 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
rec_range = le32_to_cpu(rec->e_cpos) +
ocfs2_rec_clusters(el, rec);
if (rec_range != trunc_range) {
- ocfs2_error(inode->i_sb,
- "Inode %llu: error after split at cpos %u"
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu: error after split at cpos %u"
"trunc len %u, existing record is (%u,%u)",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
cpos, len, le32_to_cpu(rec->e_cpos),
ocfs2_rec_clusters(el, rec));
ret = -EROFS;
goto out;
}
- ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
+ ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
cpos, len);
if (ret) {
mlog_errno(ret);
@@ -4496,6 +5597,170 @@ out:
return ret;
}
+/*
+ * ocfs2_reserve_blocks_for_rec_trunc() would look basically the
+ * same as ocfs2_lock_alloctors(), except for it accepts a blocks
+ * number to reserve some extra blocks, and it only handles meta
+ * data allocations.
+ *
+ * Currently, only ocfs2_remove_btree_range() uses it for truncating
+ * and punching holes.
+ */
+static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ u32 extents_to_split,
+ struct ocfs2_alloc_context **ac,
+ int extra_blocks)
+{
+ int ret = 0, num_free_extents;
+ unsigned int max_recs_needed = 2 * extents_to_split;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ *ac = NULL;
+
+ num_free_extents = ocfs2_num_free_extents(osb, et);
+ if (num_free_extents < 0) {
+ ret = num_free_extents;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (!num_free_extents ||
+ (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
+ extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
+
+ if (extra_blocks) {
+ ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+out:
+ if (ret) {
+ if (*ac) {
+ ocfs2_free_alloc_context(*ac);
+ *ac = NULL;
+ }
+ }
+
+ return ret;
+}
+
+int ocfs2_remove_btree_range(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ u32 cpos, u32 phys_cpos, u32 len, int flags,
+ struct ocfs2_cached_dealloc_ctxt *dealloc,
+ u64 refcount_loc)
+{
+ int ret, credits = 0, extra_blocks = 0;
+ u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct inode *tl_inode = osb->osb_tl_inode;
+ handle_t *handle;
+ struct ocfs2_alloc_context *meta_ac = NULL;
+ struct ocfs2_refcount_tree *ref_tree = NULL;
+
+ if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
+ BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
+ OCFS2_HAS_REFCOUNT_FL));
+
+ ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+ &ref_tree, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ ret = ocfs2_prepare_refcount_change_for_del(inode,
+ refcount_loc,
+ phys_blkno,
+ len,
+ &credits,
+ &extra_blocks);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto bail;
+ }
+ }
+
+ ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac,
+ extra_blocks);
+ if (ret) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ mutex_lock(&tl_inode->i_mutex);
+
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ ret = __ocfs2_flush_truncate_log(osb);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ handle = ocfs2_start_trans(osb,
+ ocfs2_remove_extent_credits(osb->sb) + credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_et_root_journal_access(handle, et,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ dquot_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(inode->i_sb, len));
+
+ ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ ocfs2_et_update_clusters(et, -len);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
+
+ ocfs2_journal_dirty(handle, et->et_root_bh);
+
+ if (phys_blkno) {
+ if (flags & OCFS2_EXT_REFCOUNTED)
+ ret = ocfs2_decrease_refcount(inode, handle,
+ ocfs2_blocks_to_clusters(osb->sb,
+ phys_blkno),
+ len, meta_ac,
+ dealloc, 1);
+ else
+ ret = ocfs2_truncate_log_append(osb, handle,
+ phys_blkno, len);
+ if (ret)
+ mlog_errno(ret);
+
+ }
+
+out_commit:
+ ocfs2_commit_trans(osb, handle);
+out:
+ mutex_unlock(&tl_inode->i_mutex);
+bail:
+ if (meta_ac)
+ ocfs2_free_alloc_context(meta_ac);
+
+ if (ref_tree)
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+
+ return ret;
+}
+
int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
{
struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -4541,21 +5806,18 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
- mlog_entry("start_blk = %llu, num_clusters = %u\n",
- (unsigned long long)start_blk, num_clusters);
-
BUG_ON(mutex_trylock(&tl_inode->i_mutex));
start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
- status = -EIO;
- goto bail;
- }
+ /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
+ * by the underlying call to ocfs2_read_inode_block(), so any
+ * corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
tl_count = le16_to_cpu(tl->tl_count);
mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
tl_count == 0,
@@ -4573,17 +5835,16 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
goto bail;
}
- status = ocfs2_journal_access(handle, tl_inode, tl_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- mlog(0, "Log truncate of %u clusters starting at cluster %u to "
- "%llu (index = %d)\n", num_clusters, start_cluster,
- (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index);
-
+ trace_ocfs2_truncate_log_append(
+ (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index,
+ start_cluster, num_clusters);
if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
/*
* Move index back to the record we are coalescing with.
@@ -4592,23 +5853,20 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
index--;
num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
- mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n",
- index, le32_to_cpu(tl->tl_recs[index].t_start),
- num_clusters);
+ trace_ocfs2_truncate_log_append(
+ (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
+ index, le32_to_cpu(tl->tl_recs[index].t_start),
+ num_clusters);
} else {
tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
tl->tl_used = cpu_to_le16(index + 1);
}
tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
- status = ocfs2_journal_dirty(handle, tl_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_journal_dirty(handle, tl_bh);
+ osb->truncated_clusters += num_clusters;
bail:
- mlog_exit(status);
return status;
}
@@ -4627,16 +5885,14 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
struct inode *tl_inode = osb->osb_tl_inode;
struct buffer_head *tl_bh = osb->osb_tl_bh;
- mlog_entry_void();
-
di = (struct ocfs2_dinode *) tl_bh->b_data;
tl = &di->id2.i_dealloc;
i = le16_to_cpu(tl->tl_used) - 1;
while (i >= 0) {
/* Caller has given us at least enough credits to
* update the truncate log dinode */
- status = ocfs2_journal_access(handle, tl_inode, tl_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -4644,11 +5900,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
tl->tl_used = cpu_to_le16(i);
- status = ocfs2_journal_dirty(handle, tl_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_journal_dirty(handle, tl_bh);
/* TODO: Perhaps we can calculate the bulk of the
* credits up front rather than extending like
@@ -4668,8 +5920,9 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
/* if start_blk is not set, we ignore the record as
* invalid. */
if (start_blk) {
- mlog(0, "free record %d, start = %u, clusters = %u\n",
- i, le32_to_cpu(rec.t_start), num_clusters);
+ trace_ocfs2_replay_truncate_records(
+ (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
+ i, le32_to_cpu(rec.t_start), num_clusters);
status = ocfs2_free_clusters(handle, data_alloc_inode,
data_alloc_bh, start_blk,
@@ -4682,8 +5935,9 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
i--;
}
+ osb->truncated_clusters = 0;
+
bail:
- mlog_exit(status);
return status;
}
@@ -4700,21 +5954,20 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
- mlog_entry_void();
-
BUG_ON(mutex_trylock(&tl_inode->i_mutex));
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
- status = -EIO;
- goto out;
- }
+ /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
+ * by the underlying call to ocfs2_read_inode_block(), so any
+ * corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
num_to_flush = le16_to_cpu(tl->tl_used);
- mlog(0, "Flush %u records from truncate log #%llu\n",
- num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
+ trace_ocfs2_flush_truncate_log(
+ (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
+ num_to_flush);
if (!num_to_flush) {
status = 0;
goto out;
@@ -4760,7 +6013,6 @@ out_mutex:
iput(data_alloc_inode);
out:
- mlog_exit(status);
return status;
}
@@ -4783,20 +6035,19 @@ static void ocfs2_truncate_log_worker(struct work_struct *work)
container_of(work, struct ocfs2_super,
osb_truncate_log_wq.work);
- mlog_entry_void();
-
status = ocfs2_flush_truncate_log(osb);
if (status < 0)
mlog_errno(status);
-
- mlog_exit(status);
+ else
+ ocfs2_init_steal_slots(osb);
}
#define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
int cancel)
{
- if (osb->osb_tl_inode) {
+ if (osb->osb_tl_inode &&
+ atomic_read(&osb->osb_tl_disable) == 0) {
/* We want to push off log flushes while truncates are
* still running. */
if (cancel)
@@ -4825,8 +6076,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
goto bail;
}
- status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
- OCFS2_BH_CACHED, inode);
+ status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
iput(inode);
mlog_errno(status);
@@ -4836,7 +6086,6 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
*tl_inode = inode;
*tl_bh = bh;
bail:
- mlog_exit(status);
return status;
}
@@ -4856,7 +6105,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
*tl_copy = NULL;
- mlog(0, "recover truncate log from slot %d\n", slot_num);
+ trace_ocfs2_begin_truncate_log_recovery(slot_num);
status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
if (status < 0) {
@@ -4865,16 +6114,15 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
}
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
- status = -EIO;
- goto bail;
- }
+ /* tl_bh is loaded from ocfs2_get_truncate_log_info(). It's
+ * validated by the underlying call to ocfs2_read_inode_block(),
+ * so any corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
if (le16_to_cpu(tl->tl_used)) {
- mlog(0, "We'll have %u logs to recover\n",
- le16_to_cpu(tl->tl_used));
+ trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
*tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
if (!(*tl_copy)) {
@@ -4891,7 +6139,8 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
* tl_used. */
tl->tl_used = 0;
- status = ocfs2_write_block(osb, tl_bh, tl_inode);
+ ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
+ status = ocfs2_write_block(osb, tl_bh, INODE_CACHE(tl_inode));
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -4901,15 +6150,14 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
bail:
if (tl_inode)
iput(tl_inode);
- if (tl_bh)
- brelse(tl_bh);
+ brelse(tl_bh);
if (status < 0 && (*tl_copy)) {
kfree(*tl_copy);
*tl_copy = NULL;
+ mlog_errno(status);
}
- mlog_exit(status);
return status;
}
@@ -4924,8 +6172,6 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
struct inode *tl_inode = osb->osb_tl_inode;
struct ocfs2_truncate_log *tl;
- mlog_entry_void();
-
if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
return -EINVAL;
@@ -4933,8 +6179,9 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
tl = &tl_copy->id2.i_dealloc;
num_recs = le16_to_cpu(tl->tl_used);
- mlog(0, "cleanup %u records from %llu\n", num_recs,
- (unsigned long long)le64_to_cpu(tl_copy->i_blkno));
+ trace_ocfs2_complete_truncate_log_recovery(
+ (unsigned long long)le64_to_cpu(tl_copy->i_blkno),
+ num_recs);
mutex_lock(&tl_inode->i_mutex);
for(i = 0; i < num_recs; i++) {
@@ -4969,7 +6216,6 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
bail_up:
mutex_unlock(&tl_inode->i_mutex);
- mlog_exit(status);
return status;
}
@@ -4978,7 +6224,7 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
int status;
struct inode *tl_inode = osb->osb_tl_inode;
- mlog_entry_void();
+ atomic_set(&osb->osb_tl_disable, 1);
if (tl_inode) {
cancel_delayed_work(&osb->osb_truncate_log_wq);
@@ -4991,8 +6237,6 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
brelse(osb->osb_tl_bh);
iput(osb->osb_tl_inode);
}
-
- mlog_exit_void();
}
int ocfs2_truncate_log_init(struct ocfs2_super *osb)
@@ -5001,8 +6245,6 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
struct inode *tl_inode = NULL;
struct buffer_head *tl_bh = NULL;
- mlog_entry_void();
-
status = ocfs2_get_truncate_log_info(osb,
osb->slot_num,
&tl_inode,
@@ -5015,10 +6257,10 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
* until we're sure all is well. */
INIT_DELAYED_WORK(&osb->osb_truncate_log_wq,
ocfs2_truncate_log_worker);
+ atomic_set(&osb->osb_tl_disable, 0);
osb->osb_tl_bh = tl_bh;
osb->osb_tl_inode = tl_inode;
- mlog_exit(status);
return status;
}
@@ -5041,10 +6283,14 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
*/
/*
- * Describes a single block free from a suballocator
+ * Describe a single bit freed from a suballocator. For the block
+ * suballocators, it represents one block. For the global cluster
+ * allocator, it represents some clusters and free_bit indicates
+ * clusters number.
*/
struct ocfs2_cached_block_free {
struct ocfs2_cached_block_free *free_next;
+ u64 free_bg;
u64 free_blk;
unsigned int free_bit;
};
@@ -5056,10 +6302,10 @@ struct ocfs2_per_slot_free_list {
struct ocfs2_cached_block_free *f_first;
};
-static int ocfs2_free_cached_items(struct ocfs2_super *osb,
- int sysfile_type,
- int slot,
- struct ocfs2_cached_block_free *head)
+static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
+ int sysfile_type,
+ int slot,
+ struct ocfs2_cached_block_free *head)
{
int ret;
u64 bg_blkno;
@@ -5091,10 +6337,13 @@ static int ocfs2_free_cached_items(struct ocfs2_super *osb,
}
while (head) {
- bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
- head->free_bit);
- mlog(0, "Free bit: (bit %u, blkno %llu)\n",
- head->free_bit, (unsigned long long)head->free_blk);
+ if (head->free_bg)
+ bg_blkno = head->free_bg;
+ else
+ bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
+ head->free_bit);
+ trace_ocfs2_free_cached_blocks(
+ (unsigned long long)head->free_blk, head->free_bit);
ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
head->free_bit, bg_blkno, 1);
@@ -5134,6 +6383,81 @@ out:
return ret;
}
+int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+ u64 blkno, unsigned int bit)
+{
+ int ret = 0;
+ struct ocfs2_cached_block_free *item;
+
+ item = kzalloc(sizeof(*item), GFP_NOFS);
+ if (item == NULL) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ return ret;
+ }
+
+ trace_ocfs2_cache_cluster_dealloc((unsigned long long)blkno, bit);
+
+ item->free_blk = blkno;
+ item->free_bit = bit;
+ item->free_next = ctxt->c_global_allocator;
+
+ ctxt->c_global_allocator = item;
+ return ret;
+}
+
+static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
+ struct ocfs2_cached_block_free *head)
+{
+ struct ocfs2_cached_block_free *tmp;
+ struct inode *tl_inode = osb->osb_tl_inode;
+ handle_t *handle;
+ int ret = 0;
+
+ mutex_lock(&tl_inode->i_mutex);
+
+ while (head) {
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ ret = __ocfs2_flush_truncate_log(osb);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+ }
+
+ handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ break;
+ }
+
+ ret = ocfs2_truncate_log_append(osb, handle, head->free_blk,
+ head->free_bit);
+
+ ocfs2_commit_trans(osb, handle);
+ tmp = head;
+ head = head->free_next;
+ kfree(tmp);
+
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+ }
+
+ mutex_unlock(&tl_inode->i_mutex);
+
+ while (head) {
+ /* Premature exit may have left some dangling items. */
+ tmp = head;
+ head = head->free_next;
+ kfree(tmp);
+ }
+
+ return ret;
+}
+
int ocfs2_run_deallocs(struct ocfs2_super *osb,
struct ocfs2_cached_dealloc_ctxt *ctxt)
{
@@ -5147,10 +6471,12 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
fl = ctxt->c_first_suballocator;
if (fl->f_first) {
- mlog(0, "Free items: (type %u, slot %d)\n",
- fl->f_inode_type, fl->f_slot);
- ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
- fl->f_slot, fl->f_first);
+ trace_ocfs2_run_deallocs(fl->f_inode_type,
+ fl->f_slot);
+ ret2 = ocfs2_free_cached_blocks(osb,
+ fl->f_inode_type,
+ fl->f_slot,
+ fl->f_first);
if (ret2)
mlog_errno(ret2);
if (!ret)
@@ -5161,6 +6487,17 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
kfree(fl);
}
+ if (ctxt->c_global_allocator) {
+ ret2 = ocfs2_free_cached_clusters(osb,
+ ctxt->c_global_allocator);
+ if (ret2)
+ mlog_errno(ret2);
+ if (!ret)
+ ret = ret2;
+
+ ctxt->c_global_allocator = NULL;
+ }
+
return ret;
}
@@ -5190,9 +6527,9 @@ ocfs2_find_per_slot_free_list(int type,
return fl;
}
-static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
- int type, int slot, u64 blkno,
- unsigned int bit)
+int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+ int type, int slot, u64 suballoc,
+ u64 blkno, unsigned int bit)
{
int ret;
struct ocfs2_per_slot_free_list *fl;
@@ -5205,16 +6542,18 @@ static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
goto out;
}
- item = kmalloc(sizeof(*item), GFP_NOFS);
+ item = kzalloc(sizeof(*item), GFP_NOFS);
if (item == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
- type, slot, bit, (unsigned long long)blkno);
+ trace_ocfs2_cache_block_dealloc(type, slot,
+ (unsigned long long)suballoc,
+ (unsigned long long)blkno, bit);
+ item->free_bg = suballoc;
item->free_blk = blkno;
item->free_bit = bit;
item->free_next = fl->f_first;
@@ -5231,437 +6570,21 @@ static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
{
return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
le16_to_cpu(eb->h_suballoc_slot),
+ le64_to_cpu(eb->h_suballoc_loc),
le64_to_cpu(eb->h_blkno),
le16_to_cpu(eb->h_suballoc_bit));
}
-/* This function will figure out whether the currently last extent
- * block will be deleted, and if it will, what the new last extent
- * block will be so we can update his h_next_leaf_blk field, as well
- * as the dinodes i_last_eb_blk */
-static int ocfs2_find_new_last_ext_blk(struct inode *inode,
- unsigned int clusters_to_del,
- struct ocfs2_path *path,
- struct buffer_head **new_last_eb)
-{
- int next_free, ret = 0;
- u32 cpos;
- struct ocfs2_extent_rec *rec;
- struct ocfs2_extent_block *eb;
- struct ocfs2_extent_list *el;
- struct buffer_head *bh = NULL;
-
- *new_last_eb = NULL;
-
- /* we have no tree, so of course, no last_eb. */
- if (!path->p_tree_depth)
- goto out;
-
- /* trunc to zero special case - this makes tree_depth = 0
- * regardless of what it is. */
- if (OCFS2_I(inode)->ip_clusters == clusters_to_del)
- goto out;
-
- el = path_leaf_el(path);
- BUG_ON(!el->l_next_free_rec);
-
- /*
- * Make sure that this extent list will actually be empty
- * after we clear away the data. We can shortcut out if
- * there's more than one non-empty extent in the
- * list. Otherwise, a check of the remaining extent is
- * necessary.
- */
- next_free = le16_to_cpu(el->l_next_free_rec);
- rec = NULL;
- if (ocfs2_is_empty_extent(&el->l_recs[0])) {
- if (next_free > 2)
- goto out;
-
- /* We may have a valid extent in index 1, check it. */
- if (next_free == 2)
- rec = &el->l_recs[1];
-
- /*
- * Fall through - no more nonempty extents, so we want
- * to delete this leaf.
- */
- } else {
- if (next_free > 1)
- goto out;
-
- rec = &el->l_recs[0];
- }
-
- if (rec) {
- /*
- * Check it we'll only be trimming off the end of this
- * cluster.
- */
- if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del)
- goto out;
- }
-
- ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
- ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
- eb = (struct ocfs2_extent_block *) bh->b_data;
- el = &eb->h_list;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EROFS;
- goto out;
- }
-
- *new_last_eb = bh;
- get_bh(*new_last_eb);
- mlog(0, "returning block %llu, (cpos: %u)\n",
- (unsigned long long)le64_to_cpu(eb->h_blkno), cpos);
-out:
- brelse(bh);
-
- return ret;
-}
-
-/*
- * Trim some clusters off the rightmost edge of a tree. Only called
- * during truncate.
- *
- * The caller needs to:
- * - start journaling of each path component.
- * - compute and fully set up any new last ext block
- */
-static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
- handle_t *handle, struct ocfs2_truncate_context *tc,
- u32 clusters_to_del, u64 *delete_start)
-{
- int ret, i, index = path->p_tree_depth;
- u32 new_edge = 0;
- u64 deleted_eb = 0;
- struct buffer_head *bh;
- struct ocfs2_extent_list *el;
- struct ocfs2_extent_rec *rec;
-
- *delete_start = 0;
-
- while (index >= 0) {
- bh = path->p_node[index].bh;
- el = path->p_node[index].el;
-
- mlog(0, "traveling tree (index = %d, block = %llu)\n",
- index, (unsigned long long)bh->b_blocknr);
-
- BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
-
- if (index !=
- (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) {
- ocfs2_error(inode->i_sb,
- "Inode %lu has invalid ext. block %llu",
- inode->i_ino,
- (unsigned long long)bh->b_blocknr);
- ret = -EROFS;
- goto out;
- }
-
-find_tail_record:
- i = le16_to_cpu(el->l_next_free_rec) - 1;
- rec = &el->l_recs[i];
-
- mlog(0, "Extent list before: record %d: (%u, %u, %llu), "
- "next = %u\n", i, le32_to_cpu(rec->e_cpos),
- ocfs2_rec_clusters(el, rec),
- (unsigned long long)le64_to_cpu(rec->e_blkno),
- le16_to_cpu(el->l_next_free_rec));
-
- BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del);
-
- if (le16_to_cpu(el->l_tree_depth) == 0) {
- /*
- * If the leaf block contains a single empty
- * extent and no records, we can just remove
- * the block.
- */
- if (i == 0 && ocfs2_is_empty_extent(rec)) {
- memset(rec, 0,
- sizeof(struct ocfs2_extent_rec));
- el->l_next_free_rec = cpu_to_le16(0);
-
- goto delete;
- }
-
- /*
- * Remove any empty extents by shifting things
- * left. That should make life much easier on
- * the code below. This condition is rare
- * enough that we shouldn't see a performance
- * hit.
- */
- if (ocfs2_is_empty_extent(&el->l_recs[0])) {
- le16_add_cpu(&el->l_next_free_rec, -1);
-
- for(i = 0;
- i < le16_to_cpu(el->l_next_free_rec); i++)
- el->l_recs[i] = el->l_recs[i + 1];
-
- memset(&el->l_recs[i], 0,
- sizeof(struct ocfs2_extent_rec));
-
- /*
- * We've modified our extent list. The
- * simplest way to handle this change
- * is to being the search from the
- * start again.
- */
- goto find_tail_record;
- }
-
- le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del);
-
- /*
- * We'll use "new_edge" on our way back up the
- * tree to know what our rightmost cpos is.
- */
- new_edge = le16_to_cpu(rec->e_leaf_clusters);
- new_edge += le32_to_cpu(rec->e_cpos);
-
- /*
- * The caller will use this to delete data blocks.
- */
- *delete_start = le64_to_cpu(rec->e_blkno)
- + ocfs2_clusters_to_blocks(inode->i_sb,
- le16_to_cpu(rec->e_leaf_clusters));
-
- /*
- * If it's now empty, remove this record.
- */
- if (le16_to_cpu(rec->e_leaf_clusters) == 0) {
- memset(rec, 0,
- sizeof(struct ocfs2_extent_rec));
- le16_add_cpu(&el->l_next_free_rec, -1);
- }
- } else {
- if (le64_to_cpu(rec->e_blkno) == deleted_eb) {
- memset(rec, 0,
- sizeof(struct ocfs2_extent_rec));
- le16_add_cpu(&el->l_next_free_rec, -1);
-
- goto delete;
- }
-
- /* Can this actually happen? */
- if (le16_to_cpu(el->l_next_free_rec) == 0)
- goto delete;
-
- /*
- * We never actually deleted any clusters
- * because our leaf was empty. There's no
- * reason to adjust the rightmost edge then.
- */
- if (new_edge == 0)
- goto delete;
-
- rec->e_int_clusters = cpu_to_le32(new_edge);
- le32_add_cpu(&rec->e_int_clusters,
- -le32_to_cpu(rec->e_cpos));
-
- /*
- * A deleted child record should have been
- * caught above.
- */
- BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0);
- }
-
-delete:
- ret = ocfs2_journal_dirty(handle, bh);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
- mlog(0, "extent list container %llu, after: record %d: "
- "(%u, %u, %llu), next = %u.\n",
- (unsigned long long)bh->b_blocknr, i,
- le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec),
- (unsigned long long)le64_to_cpu(rec->e_blkno),
- le16_to_cpu(el->l_next_free_rec));
-
- /*
- * We must be careful to only attempt delete of an
- * extent block (and not the root inode block).
- */
- if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) {
- struct ocfs2_extent_block *eb =
- (struct ocfs2_extent_block *)bh->b_data;
-
- /*
- * Save this for use when processing the
- * parent block.
- */
- deleted_eb = le64_to_cpu(eb->h_blkno);
-
- mlog(0, "deleting this extent block.\n");
-
- ocfs2_remove_from_cache(inode, bh);
-
- BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
- BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
- BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
-
- ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
- /* An error here is not fatal. */
- if (ret < 0)
- mlog_errno(ret);
- } else {
- deleted_eb = 0;
- }
-
- index--;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-static int ocfs2_do_truncate(struct ocfs2_super *osb,
- unsigned int clusters_to_del,
- struct inode *inode,
- struct buffer_head *fe_bh,
- handle_t *handle,
- struct ocfs2_truncate_context *tc,
- struct ocfs2_path *path)
-{
- int status;
- struct ocfs2_dinode *fe;
- struct ocfs2_extent_block *last_eb = NULL;
- struct ocfs2_extent_list *el;
- struct buffer_head *last_eb_bh = NULL;
- u64 delete_blk = 0;
-
- fe = (struct ocfs2_dinode *) fe_bh->b_data;
-
- status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del,
- path, &last_eb_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
- /*
- * Each component will be touched, so we might as well journal
- * here to avoid having to handle errors later.
- */
- status = ocfs2_journal_access_path(inode, handle, path);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
- if (last_eb_bh) {
- status = ocfs2_journal_access(handle, inode, last_eb_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
- last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- }
-
- el = &(fe->id2.i_list);
-
- /*
- * Lower levels depend on this never happening, but it's best
- * to check it up here before changing the tree.
- */
- if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) {
- ocfs2_error(inode->i_sb,
- "Inode %lu has an empty extent record, depth %u\n",
- inode->i_ino, le16_to_cpu(el->l_tree_depth));
- status = -EROFS;
- goto bail;
- }
-
- spin_lock(&OCFS2_I(inode)->ip_lock);
- OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
- clusters_to_del;
- spin_unlock(&OCFS2_I(inode)->ip_lock);
- le32_add_cpu(&fe->i_clusters, -clusters_to_del);
- inode->i_blocks = ocfs2_inode_sector_count(inode);
-
- status = ocfs2_trim_tree(inode, path, handle, tc,
- clusters_to_del, &delete_blk);
- if (status) {
- mlog_errno(status);
- goto bail;
- }
-
- if (le32_to_cpu(fe->i_clusters) == 0) {
- /* trunc to zero is a special case. */
- el->l_tree_depth = 0;
- fe->i_last_eb_blk = 0;
- } else if (last_eb)
- fe->i_last_eb_blk = last_eb->h_blkno;
-
- status = ocfs2_journal_dirty(handle, fe_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
- if (last_eb) {
- /* If there will be a new last extent block, then by
- * definition, there cannot be any leaves to the right of
- * him. */
- last_eb->h_next_leaf_blk = 0;
- status = ocfs2_journal_dirty(handle, last_eb_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- }
-
- if (delete_blk) {
- status = ocfs2_truncate_log_append(osb, handle, delete_blk,
- clusters_to_del);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- }
- status = 0;
-bail:
-
- mlog_exit(status);
- return status;
-}
-
-static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh)
+static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
{
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
return 0;
}
-static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
-{
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- return ocfs2_journal_dirty_data(handle, bh);
-}
-
-static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
- unsigned int from, unsigned int to,
- struct page *page, int zero, u64 *phys)
+void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
+ unsigned int from, unsigned int to,
+ struct page *page, int zero, u64 *phys)
{
int ret, partial = 0;
@@ -5677,17 +6600,13 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
* here if they aren't - ocfs2_map_page_blocks()
* might've skipped some
*/
- if (ocfs2_should_order_data(inode)) {
- ret = walk_page_buffers(handle,
- page_buffers(page),
- from, to, &partial,
- ocfs2_ordered_zero_func);
- if (ret < 0)
- mlog_errno(ret);
- } else {
- ret = walk_page_buffers(handle, page_buffers(page),
- from, to, &partial,
- ocfs2_writeback_zero_func);
+ ret = walk_page_buffers(handle, page_buffers(page),
+ from, to, &partial,
+ ocfs2_zero_func);
+ if (ret < 0)
+ mlog_errno(ret);
+ else if (ocfs2_should_order_data(inode)) {
+ ret = ocfs2_jbd2_file_inode(handle, inode);
if (ret < 0)
mlog_errno(ret);
}
@@ -5733,25 +6652,21 @@ out:
ocfs2_unlock_and_free_pages(pages, numpages);
}
-static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
- struct page **pages, int *num)
+int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
+ struct page **pages, int *num)
{
int numpages, ret = 0;
- struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
unsigned long index;
loff_t last_page_bytes;
BUG_ON(start > end);
- BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
- (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
-
numpages = 0;
last_page_bytes = PAGE_ALIGN(end);
index = start >> PAGE_CACHE_SHIFT;
do {
- pages[numpages] = grab_cache_page(mapping, index);
+ pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
if (!pages[numpages]) {
ret = -ENOMEM;
mlog_errno(ret);
@@ -5774,6 +6689,17 @@ out:
return ret;
}
+static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
+ struct page **pages, int *num)
+{
+ struct super_block *sb = inode->i_sb;
+
+ BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
+ (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
+
+ return ocfs2_grab_pages(inode, start, end, pages, num);
+}
+
/*
* Zero the area past i_size but still within an allocated
* cluster. This avoids exposing nonzero data on subsequent file
@@ -5840,32 +6766,40 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
* wait on them - the truncate_inode_pages() call later will
* do that for us.
*/
- ret = do_sync_mapping_range(inode->i_mapping, range_start,
- range_end - 1, SYNC_FILE_RANGE_WRITE);
+ ret = filemap_fdatawrite_range(inode->i_mapping, range_start,
+ range_end - 1);
if (ret)
mlog_errno(ret);
out:
- if (pages)
- kfree(pages);
+ kfree(pages);
return ret;
}
-static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
+static void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode,
+ struct ocfs2_dinode *di)
{
unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
+ unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
- memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
+ if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
+ memset(&di->id2, 0, blocksize -
+ offsetof(struct ocfs2_dinode, id2) -
+ xattrsize);
+ else
+ memset(&di->id2, 0, blocksize -
+ offsetof(struct ocfs2_dinode, id2));
}
void ocfs2_dinode_new_extent_list(struct inode *inode,
struct ocfs2_dinode *di)
{
- ocfs2_zero_dinode_id2(inode, di);
+ ocfs2_zero_dinode_id2_with_xattr(inode, di);
di->id2.i_list.l_tree_depth = 0;
di->id2.i_list.l_next_free_rec = 0;
- di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
+ di->id2.i_list.l_count = cpu_to_le16(
+ ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di));
}
void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
@@ -5882,15 +6816,18 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
* We clear the entire i_data structure here so that all
* fields can be properly initialized.
*/
- ocfs2_zero_dinode_id2(inode, di);
+ ocfs2_zero_dinode_id2_with_xattr(inode, di);
- idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
+ idata->id_count = cpu_to_le16(
+ ocfs2_max_inline_data_with_xattr(inode->i_sb, di));
}
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
int ret, i, has_data, num_pages = 0;
+ int need_free = 0;
+ u32 bit_off, num;
handle_t *handle;
u64 uninitialized_var(block);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
@@ -5899,6 +6836,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct ocfs2_alloc_context *data_ac = NULL;
struct page **pages = NULL;
loff_t end = osb->s_clustersize;
+ struct ocfs2_extent_tree et;
+ int did_quota = 0;
has_data = i_size_read(inode) ? 1 : 0;
@@ -5918,26 +6857,34 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
+ handle = ocfs2_start_trans(osb,
+ ocfs2_inline_to_extents_credits(osb->sb));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out_unlock;
}
- ret = ocfs2_journal_access(handle, inode, di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
if (has_data) {
- u32 bit_off, num;
unsigned int page_end;
u64 phys;
- ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
+ ret = dquot_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1));
+ if (ret)
+ goto out_commit;
+ did_quota = 1;
+
+ data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
+
+ ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
&num);
if (ret) {
mlog_errno(ret);
@@ -5961,6 +6908,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -5971,6 +6919,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -5988,6 +6937,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
spin_unlock(&oi->ip_lock);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_dinode_new_extent_list(inode, di);
ocfs2_journal_dirty(handle, di_bh);
@@ -5998,10 +6948,11 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
* this proves to be false, we could always re-build
* the in-inode data from our pages.
*/
- ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
- 0, block, 1, 0, NULL);
+ ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
+ ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -6009,6 +6960,22 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
out_commit:
+ if (ret < 0 && did_quota)
+ dquot_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1));
+
+ if (need_free) {
+ if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+ ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+ bit_off, num);
+ else
+ ocfs2_free_clusters(handle,
+ data_ac->ac_inode,
+ data_ac->ac_bh,
+ ocfs2_clusters_to_blocks(osb->sb, bit_off),
+ num);
+ }
+
ocfs2_commit_trans(osb, handle);
out_unlock:
@@ -6032,22 +6999,28 @@ out:
*/
int ocfs2_commit_truncate(struct ocfs2_super *osb,
struct inode *inode,
- struct buffer_head *fe_bh,
- struct ocfs2_truncate_context *tc)
+ struct buffer_head *di_bh)
{
- int status, i, credits, tl_sem = 0;
- u32 clusters_to_del, new_highest_cpos, range;
+ int status = 0, i, flags = 0;
+ u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff;
+ u64 blkno = 0;
struct ocfs2_extent_list *el;
- handle_t *handle = NULL;
- struct inode *tl_inode = osb->osb_tl_inode;
+ struct ocfs2_extent_rec *rec;
struct ocfs2_path *path = NULL;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+ struct ocfs2_extent_list *root_el = &(di->id2.i_list);
+ u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
+ struct ocfs2_extent_tree et;
+ struct ocfs2_cached_dealloc_ctxt dealloc;
- mlog_entry_void();
+ ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
+ ocfs2_init_dealloc_ctxt(&dealloc);
new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
i_size_read(inode));
- path = ocfs2_new_inode_path(fe_bh);
+ path = ocfs2_new_path(di_bh, &di->id2.i_list,
+ ocfs2_journal_access_di);
if (!path) {
status = -ENOMEM;
mlog_errno(status);
@@ -6068,14 +7041,17 @@ start:
/*
* Truncate always works against the rightmost tree branch.
*/
- status = ocfs2_find_path(inode, path, UINT_MAX);
+ status = ocfs2_find_path(INODE_CACHE(inode), path, UINT_MAX);
if (status) {
mlog_errno(status);
goto bail;
}
- mlog(0, "inode->ip_clusters = %u, tree_depth = %u\n",
- OCFS2_I(inode)->ip_clusters, path->p_tree_depth);
+ trace_ocfs2_commit_truncate(
+ (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ new_highest_cpos,
+ OCFS2_I(inode)->ip_clusters,
+ path->p_tree_depth);
/*
* By now, el will point to the extent list on the bottom most
@@ -6099,61 +7075,60 @@ start:
}
i = le16_to_cpu(el->l_next_free_rec) - 1;
- range = le32_to_cpu(el->l_recs[i].e_cpos) +
- ocfs2_rec_clusters(el, &el->l_recs[i]);
- if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) {
- clusters_to_del = 0;
- } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) {
- clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]);
+ rec = &el->l_recs[i];
+ flags = rec->e_flags;
+ range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
+
+ if (i == 0 && ocfs2_is_empty_extent(rec)) {
+ /*
+ * Lower levels depend on this never happening, but it's best
+ * to check it up here before changing the tree.
+ */
+ if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
+ ocfs2_error(inode->i_sb, "Inode %lu has an empty "
+ "extent record, depth %u\n", inode->i_ino,
+ le16_to_cpu(root_el->l_tree_depth));
+ status = -EROFS;
+ goto bail;
+ }
+ trunc_cpos = le32_to_cpu(rec->e_cpos);
+ trunc_len = 0;
+ blkno = 0;
+ } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
+ /*
+ * Truncate entire record.
+ */
+ trunc_cpos = le32_to_cpu(rec->e_cpos);
+ trunc_len = ocfs2_rec_clusters(el, rec);
+ blkno = le64_to_cpu(rec->e_blkno);
} else if (range > new_highest_cpos) {
- clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) +
- le32_to_cpu(el->l_recs[i].e_cpos)) -
- new_highest_cpos;
+ /*
+ * Partial truncate. it also should be
+ * the last truncate we're doing.
+ */
+ trunc_cpos = new_highest_cpos;
+ trunc_len = range - new_highest_cpos;
+ coff = new_highest_cpos - le32_to_cpu(rec->e_cpos);
+ blkno = le64_to_cpu(rec->e_blkno) +
+ ocfs2_clusters_to_blocks(inode->i_sb, coff);
} else {
+ /*
+ * Truncate completed, leave happily.
+ */
status = 0;
goto bail;
}
- mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n",
- clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
+ phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
- mutex_lock(&tl_inode->i_mutex);
- tl_sem = 1;
- /* ocfs2_truncate_log_needs_flush guarantees us at least one
- * record is free for use. If there isn't any, we flush to get
- * an empty truncate log. */
- if (ocfs2_truncate_log_needs_flush(osb)) {
- status = __ocfs2_flush_truncate_log(osb);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- }
-
- credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
- (struct ocfs2_dinode *)fe_bh->b_data,
- el);
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- handle = NULL;
- mlog_errno(status);
- goto bail;
- }
-
- status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
- tc, path);
+ status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
+ phys_cpos, trunc_len, flags, &dealloc,
+ refcount_loc);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- mutex_unlock(&tl_inode->i_mutex);
- tl_sem = 0;
-
- ocfs2_commit_trans(osb, handle);
- handle = NULL;
-
ocfs2_reinit_path(path, 1);
/*
@@ -6166,84 +7141,10 @@ bail:
ocfs2_schedule_truncate_log_flush(osb, 1);
- if (tl_sem)
- mutex_unlock(&tl_inode->i_mutex);
-
- if (handle)
- ocfs2_commit_trans(osb, handle);
-
- ocfs2_run_deallocs(osb, &tc->tc_dealloc);
+ ocfs2_run_deallocs(osb, &dealloc);
ocfs2_free_path(path);
- /* This will drop the ext_alloc cluster lock for us */
- ocfs2_free_truncate_context(tc);
-
- mlog_exit(status);
- return status;
-}
-
-/*
- * Expects the inode to already be locked.
- */
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
- struct inode *inode,
- struct buffer_head *fe_bh,
- struct ocfs2_truncate_context **tc)
-{
- int status;
- unsigned int new_i_clusters;
- struct ocfs2_dinode *fe;
- struct ocfs2_extent_block *eb;
- struct buffer_head *last_eb_bh = NULL;
-
- mlog_entry_void();
-
- *tc = NULL;
-
- new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
- i_size_read(inode));
- fe = (struct ocfs2_dinode *) fe_bh->b_data;
-
- mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
- "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters,
- (unsigned long long)le64_to_cpu(fe->i_size));
-
- *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
- if (!(*tc)) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail;
- }
- ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
-
- if (fe->id2.i_list.l_tree_depth) {
- status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
- &last_eb_bh, OCFS2_BH_CACHED, inode);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-
- brelse(last_eb_bh);
- status = -EIO;
- goto bail;
- }
- }
-
- (*tc)->tc_last_eb_bh = last_eb_bh;
-
- status = 0;
-bail:
- if (status < 0) {
- if (*tc)
- ocfs2_free_truncate_context(*tc);
- *tc = NULL;
- }
- mlog_exit_void();
return status;
}
@@ -6263,7 +7164,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
if (end > i_size_read(inode))
end = i_size_read(inode);
- BUG_ON(start >= end);
+ BUG_ON(start > end);
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
@@ -6286,8 +7187,8 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
goto out;
}
- ret = ocfs2_journal_access(handle, inode, di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
@@ -6313,6 +7214,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, di_bh);
out_commit:
@@ -6322,18 +7224,163 @@ out:
return ret;
}
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
+static int ocfs2_trim_extent(struct super_block *sb,
+ struct ocfs2_group_desc *gd,
+ u32 start, u32 count)
{
- /*
- * The caller is responsible for completing deallocation
- * before freeing the context.
- */
- if (tc->tc_dealloc.c_first_suballocator != NULL)
- mlog(ML_NOTICE,
- "Truncate completion has non-empty dealloc context\n");
+ u64 discard, bcount;
+
+ bcount = ocfs2_clusters_to_blocks(sb, count);
+ discard = le64_to_cpu(gd->bg_blkno) +
+ ocfs2_clusters_to_blocks(sb, start);
+
+ trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
+
+ return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
+}
+
+static int ocfs2_trim_group(struct super_block *sb,
+ struct ocfs2_group_desc *gd,
+ u32 start, u32 max, u32 minbits)
+{
+ int ret = 0, count = 0, next;
+ void *bitmap = gd->bg_bitmap;
+
+ if (le16_to_cpu(gd->bg_free_bits_count) < minbits)
+ return 0;
+
+ trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno),
+ start, max, minbits);
+
+ while (start < max) {
+ start = ocfs2_find_next_zero_bit(bitmap, max, start);
+ if (start >= max)
+ break;
+ next = ocfs2_find_next_bit(bitmap, max, start);
+
+ if ((next - start) >= minbits) {
+ ret = ocfs2_trim_extent(sb, gd,
+ start, next - start);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+ count += next - start;
+ }
+ start = next + 1;
+
+ if (fatal_signal_pending(current)) {
+ count = -ERESTARTSYS;
+ break;
+ }
+
+ if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits)
+ break;
+ }
+
+ if (ret < 0)
+ count = ret;
- if (tc->tc_last_eb_bh)
- brelse(tc->tc_last_eb_bh);
+ return count;
+}
+
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+ u64 start, len, trimmed, first_group, last_group, group;
+ int ret, cnt;
+ u32 first_bit, last_bit, minlen;
+ struct buffer_head *main_bm_bh = NULL;
+ struct inode *main_bm_inode = NULL;
+ struct buffer_head *gd_bh = NULL;
+ struct ocfs2_dinode *main_bm;
+ struct ocfs2_group_desc *gd = NULL;
+
+ start = range->start >> osb->s_clustersize_bits;
+ len = range->len >> osb->s_clustersize_bits;
+ minlen = range->minlen >> osb->s_clustersize_bits;
+
+ if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
+ return -EINVAL;
- kfree(tc);
+ main_bm_inode = ocfs2_get_system_file_inode(osb,
+ GLOBAL_BITMAP_SYSTEM_INODE,
+ OCFS2_INVALID_SLOT);
+ if (!main_bm_inode) {
+ ret = -EIO;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ mutex_lock(&main_bm_inode->i_mutex);
+
+ ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out_mutex;
+ }
+ main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
+
+ if (start >= le32_to_cpu(main_bm->i_clusters)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ len = range->len >> osb->s_clustersize_bits;
+ if (start + len > le32_to_cpu(main_bm->i_clusters))
+ len = le32_to_cpu(main_bm->i_clusters) - start;
+
+ trace_ocfs2_trim_fs(start, len, minlen);
+
+ /* Determine first and last group to examine based on start and len */
+ first_group = ocfs2_which_cluster_group(main_bm_inode, start);
+ if (first_group == osb->first_cluster_group_blkno)
+ first_bit = start;
+ else
+ first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
+ last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
+ last_bit = osb->bitmap_cpg;
+
+ trimmed = 0;
+ for (group = first_group; group <= last_group;) {
+ if (first_bit + len >= osb->bitmap_cpg)
+ last_bit = osb->bitmap_cpg;
+ else
+ last_bit = first_bit + len;
+
+ ret = ocfs2_read_group_descriptor(main_bm_inode,
+ main_bm, group,
+ &gd_bh);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+
+ gd = (struct ocfs2_group_desc *)gd_bh->b_data;
+ cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
+ brelse(gd_bh);
+ gd_bh = NULL;
+ if (cnt < 0) {
+ ret = cnt;
+ mlog_errno(ret);
+ break;
+ }
+
+ trimmed += cnt;
+ len -= osb->bitmap_cpg - first_bit;
+ first_bit = 0;
+ if (group == osb->first_cluster_group_blkno)
+ group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
+ else
+ group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
+ }
+ range->len = trimmed * sb->s_blocksize;
+out_unlock:
+ ocfs2_inode_unlock(main_bm_inode, 0);
+ brelse(main_bm_bh);
+out_mutex:
+ mutex_unlock(&main_bm_inode->i_mutex);
+ iput(main_bm_inode);
+out:
+ return ret;
}