diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 1410 |
1 files changed, 864 insertions, 546 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4890d6f3ad1..5199bac7fc6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -46,6 +46,13 @@ #include <trace/events/ext4.h> +static int ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, + int split_flag, + int flags); + static int ext4_ext_truncate_extend_restart(handle_t *handle, struct inode *inode, int needed) @@ -192,12 +199,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, static ext4_fsblk_t ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *ex, int *err) + struct ext4_extent *ex, int *err, unsigned int flags) { ext4_fsblk_t goal, newblock; goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); - newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); + newblock = ext4_new_meta_blocks(handle, inode, goal, flags, + NULL, err); return newblock; } @@ -474,9 +482,43 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) } ext_debug("\n"); } + +static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, + ext4_fsblk_t newblock, int level) +{ + int depth = ext_depth(inode); + struct ext4_extent *ex; + + if (depth != level) { + struct ext4_extent_idx *idx; + idx = path[level].p_idx; + while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { + ext_debug("%d: move %d:%llu in new index %llu\n", level, + le32_to_cpu(idx->ei_block), + ext4_idx_pblock(idx), + newblock); + idx++; + } + + return; + } + + ex = path[depth].p_ext; + while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { + ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", + le32_to_cpu(ex->ee_block), + ext4_ext_pblock(ex), + ext4_ext_is_uninitialized(ex), + ext4_ext_get_actual_len(ex), + newblock); + ex++; + } +} + #else #define ext4_ext_show_path(inode, path) #define ext4_ext_show_leaf(inode, path) +#define ext4_ext_show_move(inode, path, newblock, level) #endif void ext4_ext_drop_refs(struct ext4_ext_path *path) @@ -792,14 +834,14 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, * - initializes subtree */ static int ext4_ext_split(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext, int at) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext, int at) { struct buffer_head *bh = NULL; int depth = ext_depth(inode); struct ext4_extent_header *neh; struct ext4_extent_idx *fidx; - struct ext4_extent *ex; int i = at, k, m, a; ext4_fsblk_t newblock, oldblock; __le32 border; @@ -847,7 +889,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); for (a = 0; a < depth - at; a++) { newblock = ext4_ext_new_meta_block(handle, inode, path, - newext, &err); + newext, &err, flags); if (newblock == 0) goto cleanup; ablocks[a] = newblock; @@ -876,7 +918,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; - ex = EXT_FIRST_EXTENT(neh); /* move remainder of path[depth] to the new leaf */ if (unlikely(path[depth].p_hdr->eh_entries != @@ -888,25 +929,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, goto cleanup; } /* start copy from next extent */ - /* TODO: we could do it by single memmove */ - m = 0; - path[depth].p_ext++; - while (path[depth].p_ext <= - EXT_MAX_EXTENT(path[depth].p_hdr)) { - ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", - le32_to_cpu(path[depth].p_ext->ee_block), - ext4_ext_pblock(path[depth].p_ext), - ext4_ext_is_uninitialized(path[depth].p_ext), - ext4_ext_get_actual_len(path[depth].p_ext), - newblock); - /*memmove(ex++, path[depth].p_ext++, - sizeof(struct ext4_extent)); - neh->eh_entries++;*/ - path[depth].p_ext++; - m++; - } + m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; + ext4_ext_show_move(inode, path, newblock, depth); if (m) { - memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); + struct ext4_extent *ex; + ex = EXT_FIRST_EXTENT(neh); + memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); le16_add_cpu(&neh->eh_entries, m); } @@ -968,12 +996,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("int.index at %d (block %llu): %u -> %llu\n", i, newblock, le32_to_cpu(border), oldblock); - /* copy indexes */ - m = 0; - path[i].p_idx++; - ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, - EXT_MAX_INDEX(path[i].p_hdr)); + /* move remainder of path[i] to the new index block */ if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != EXT_LAST_INDEX(path[i].p_hdr))) { EXT4_ERROR_INODE(inode, @@ -982,20 +1006,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { - ext_debug("%d: move %d:%llu in new index %llu\n", i, - le32_to_cpu(path[i].p_idx->ei_block), - ext4_idx_pblock(path[i].p_idx), - newblock); - /*memmove(++fidx, path[i].p_idx++, - sizeof(struct ext4_extent_idx)); - neh->eh_entries++; - BUG_ON(neh->eh_entries > neh->eh_max);*/ - path[i].p_idx++; - m++; - } + /* start copy indexes */ + m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; + ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, + EXT_MAX_INDEX(path[i].p_hdr)); + ext4_ext_show_move(inode, path, newblock, i); if (m) { - memmove(++fidx, path[i].p_idx - m, + memmove(++fidx, path[i].p_idx, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } @@ -1056,8 +1073,9 @@ cleanup: * just created block */ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext) { struct ext4_ext_path *curp = path; struct ext4_extent_header *neh; @@ -1065,7 +1083,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock; int err = 0; - newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); + newblock = ext4_ext_new_meta_block(handle, inode, path, + newext, &err, flags); if (newblock == 0) return err; @@ -1140,8 +1159,9 @@ out: * if no free index is found, then it requests in-depth growing. */ static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext) { struct ext4_ext_path *curp; int depth, i, err = 0; @@ -1161,7 +1181,7 @@ repeat: if (EXT_HAS_FREE_INDEX(curp)) { /* if we found index with free entry, then use that * entry: create all needed subtree and add new leaf */ - err = ext4_ext_split(handle, inode, path, newext, i); + err = ext4_ext_split(handle, inode, flags, path, newext, i); if (err) goto out; @@ -1174,7 +1194,8 @@ repeat: err = PTR_ERR(path); } else { /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(handle, inode, path, newext); + err = ext4_ext_grow_indepth(handle, inode, flags, + path, newext); if (err) goto out; @@ -1563,7 +1584,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns * 1 if they got merged. */ -static int ext4_ext_try_to_merge(struct inode *inode, +static int ext4_ext_try_to_merge_right(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex) { @@ -1603,6 +1624,31 @@ static int ext4_ext_try_to_merge(struct inode *inode, } /* + * This function tries to merge the @ex extent to neighbours in the tree. + * return 1 if merge left else 0. + */ +static int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex) { + struct ext4_extent_header *eh; + unsigned int depth; + int merge_done = 0; + int ret = 0; + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + eh = path[depth].p_hdr; + + if (ex > EXT_FIRST_EXTENT(eh)) + merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); + + if (!merge_done) + ret = ext4_ext_try_to_merge_right(inode, path, ex); + + return ret; +} + +/* * check if a portion of the "newext" extent overlaps with an * existing extent. * @@ -1668,6 +1714,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, int depth, len, err; ext4_lblk_t next; unsigned uninitialized = 0; + int flags = 0; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); @@ -1742,7 +1789,9 @@ repeat: * There is no free space in the found leaf. * We're gonna add a new leaf in the tree. */ - err = ext4_ext_create_new_leaf(handle, inode, path, newext); + if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) + flags = EXT4_MB_USE_ROOT_BLOCKS; + err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); if (err) goto cleanup; depth = ext_depth(inode); @@ -2003,13 +2052,25 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } /* + * ext4_ext_in_cache() + * Checks to see if the given block is in the cache. + * If it is, the cached extent is stored in the given + * cache extent pointer. If the cached extent is a hole, + * this routine should be used instead of + * ext4_ext_in_cache if the calling function needs to + * know the size of the hole. + * + * @inode: The files inode + * @block: The block to look for in the cache + * @ex: Pointer where the cached extent will be stored + * if it contains block + * * Return 0 if cache is invalid; 1 if the cache is valid */ -static int -ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, - struct ext4_extent *ex) -{ +static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, + struct ext4_ext_cache *ex){ struct ext4_ext_cache *cex; + struct ext4_sb_info *sbi; int ret = 0; /* @@ -2017,26 +2078,60 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; + sbi = EXT4_SB(inode->i_sb); /* has cache valid data? */ if (cex->ec_len == 0) goto errout; if (in_range(block, cex->ec_block, cex->ec_len)) { - ex->ee_block = cpu_to_le32(cex->ec_block); - ext4_ext_store_pblock(ex, cex->ec_start); - ex->ee_len = cpu_to_le16(cex->ec_len); + memcpy(ex, cex, sizeof(struct ext4_ext_cache)); ext_debug("%u cached by %u:%u:%llu\n", block, cex->ec_block, cex->ec_len, cex->ec_start); ret = 1; } errout: + if (!ret) + sbi->extent_cache_misses++; + else + sbi->extent_cache_hits++; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); return ret; } /* + * ext4_ext_in_cache() + * Checks to see if the given block is in the cache. + * If it is, the cached extent is stored in the given + * extent pointer. + * + * @inode: The files inode + * @block: The block to look for in the cache + * @ex: Pointer where the cached extent will be stored + * if it contains block + * + * Return 0 if cache is invalid; 1 if the cache is valid + */ +static int +ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, + struct ext4_extent *ex) +{ + struct ext4_ext_cache cex; + int ret = 0; + + if (ext4_ext_check_cache(inode, block, &cex)) { + ex->ee_block = cpu_to_le32(cex.ec_block); + ext4_ext_store_pblock(ex, cex.ec_start); + ex->ee_len = cpu_to_le16(cex.ec_len); + ret = 1; + } + + return ret; +} + + +/* * ext4_ext_rm_idx: * removes index from the index block. * It's used in truncate case only, thus all requests are for @@ -2163,8 +2258,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, start, num, flags); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { - printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), ee_len); + /* head removal */ + ext4_lblk_t num; + ext4_fsblk_t start; + + num = to - from; + start = ext4_ext_pblock(ex); + + ext_debug("free first %u blocks starting %llu\n", num, start); + ext4_free_blocks(handle, inode, 0, start, num, flags); + } else { printk(KERN_INFO "strange request: removal(2) " "%u-%u from %u:%u\n", @@ -2173,9 +2276,22 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, return 0; } + +/* + * ext4_ext_rm_leaf() Removes the extents associated with the + * blocks appearing between "start" and "end", and splits the extents + * if "start" and "end" appear in the same extent + * + * @handle: The journal handle + * @inode: The files inode + * @path: The path to the leaf + * @start: The first block to remove + * @end: The last block to remove + */ static int ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, ext4_lblk_t start) + struct ext4_ext_path *path, ext4_lblk_t start, + ext4_lblk_t end) { int err = 0, correct_index = 0; int depth = ext_depth(inode), credits; @@ -2186,6 +2302,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned short ex_ee_len; unsigned uninitialized = 0; struct ext4_extent *ex; + struct ext4_map_blocks map; /* the header must be checked already in ext4_ext_remove_space() */ ext_debug("truncate since %u in leaf\n", start); @@ -2215,31 +2332,95 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, path[depth].p_ext = ex; a = ex_ee_block > start ? ex_ee_block : start; - b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? - ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; + b = ex_ee_block+ex_ee_len - 1 < end ? + ex_ee_block+ex_ee_len - 1 : end; ext_debug(" border %u:%u\n", a, b); - if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { - block = 0; - num = 0; - BUG(); + /* If this extent is beyond the end of the hole, skip it */ + if (end <= ex_ee_block) { + ex--; + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = ext4_ext_get_actual_len(ex); + continue; + } else if (a != ex_ee_block && + b != ex_ee_block + ex_ee_len - 1) { + /* + * If this is a truncate, then this condition should + * never happen because at least one of the end points + * needs to be on the edge of the extent. + */ + if (end == EXT_MAX_BLOCK) { + ext_debug(" bad truncate %u:%u\n", + start, end); + block = 0; + num = 0; + err = -EIO; + goto out; + } + /* + * else this is a hole punch, so the extent needs to + * be split since neither edge of the hole is on the + * extent edge + */ + else{ + map.m_pblk = ext4_ext_pblock(ex); + map.m_lblk = ex_ee_block; + map.m_len = b - ex_ee_block; + + err = ext4_split_extent(handle, + inode, path, &map, 0, + EXT4_GET_BLOCKS_PUNCH_OUT_EXT | + EXT4_GET_BLOCKS_PRE_IO); + + if (err < 0) + goto out; + + ex_ee_len = ext4_ext_get_actual_len(ex); + + b = ex_ee_block+ex_ee_len - 1 < end ? + ex_ee_block+ex_ee_len - 1 : end; + + /* Then remove tail of this extent */ + block = ex_ee_block; + num = a - block; + } } else if (a != ex_ee_block) { /* remove tail of the extent */ block = ex_ee_block; num = a - block; } else if (b != ex_ee_block + ex_ee_len - 1) { /* remove head of the extent */ - block = a; - num = b - a; - /* there is no "make a hole" API yet */ - BUG(); + block = b; + num = ex_ee_block + ex_ee_len - b; + + /* + * If this is a truncate, this condition + * should never happen + */ + if (end == EXT_MAX_BLOCK) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } } else { /* remove whole extent: excellent! */ block = ex_ee_block; num = 0; - BUG_ON(a != ex_ee_block); - BUG_ON(b != ex_ee_block + ex_ee_len - 1); + if (a != ex_ee_block) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } + + if (b != ex_ee_block + ex_ee_len - 1) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } } /* @@ -2270,7 +2451,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (num == 0) { /* this extent is removed; mark slot entirely unused */ ext4_ext_store_pblock(ex, 0); - le16_add_cpu(&eh->eh_entries, -1); + } else if (block != ex_ee_block) { + /* + * If this was a head removal, then we need to update + * the physical block since it is now at a different + * location + */ + ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a)); } ex->ee_block = cpu_to_le32(block); @@ -2286,6 +2473,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (err) goto out; + /* + * If the extent was completely released, + * we need to remove it from the leaf + */ + if (num == 0) { + if (end != EXT_MAX_BLOCK) { + /* + * For hole punching, we need to scoot all the + * extents up when an extent is removed so that + * we dont have blank extents in the middle + */ + memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) * + sizeof(struct ext4_extent)); + + /* Now get rid of the one at the end */ + memset(EXT_LAST_EXTENT(eh), 0, + sizeof(struct ext4_extent)); + } + le16_add_cpu(&eh->eh_entries, -1); + } + ext_debug("new extent: %u:%u:%llu\n", block, num, ext4_ext_pblock(ex)); ex--; @@ -2326,7 +2534,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, + ext4_lblk_t end) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); @@ -2365,7 +2574,8 @@ again: while (i >= 0 && err == 0) { if (i == depth) { /* this is leaf block */ - err = ext4_ext_rm_leaf(handle, inode, path, start); + err = ext4_ext_rm_leaf(handle, inode, path, + start, end); /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); path[i].p_bh = NULL; @@ -2529,6 +2739,195 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) return ret; } +/* + * used by extent splitting. + */ +#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ + due to ENOSPC */ +#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ +#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ + +/* + * ext4_split_extent_at() splits an extent at given block. + * + * @handle: the journal handle + * @inode: the file inode + * @path: the path to the extent + * @split: the logical block where the extent is splitted. + * @split_flags: indicates if the extent could be zeroout if split fails, and + * the states(init or uninit) of new extents. + * @flags: flags used to insert new extent to extent tree. + * + * + * Splits extent [a, b] into two extents [a, @split) and [@split, b], states + * of which are deterimined by split_flag. + * + * There are two cases: + * a> the extent are splitted into two extent. + * b> split is not needed, and just mark the extent. + * + * return 0 on success. + */ +static int ext4_split_extent_at(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t split, + int split_flag, + int flags) +{ + ext4_fsblk_t newblock; + ext4_lblk_t ee_block; + struct ext4_extent *ex, newex, orig_ex; + struct ext4_extent *ex2 = NULL; + unsigned int ee_len, depth; + int err = 0; + + ext_debug("ext4_split_extents_at: inode %lu, logical" + "block %llu\n", inode->i_ino, (unsigned long long)split); + + ext4_ext_show_leaf(inode, path); + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + newblock = split - ee_block + ext4_ext_pblock(ex); + + BUG_ON(split < ee_block || split >= (ee_block + ee_len)); + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + + if (split == ee_block) { + /* + * case b: block @split is the block that the extent begins with + * then we just change the state of the extent, and splitting + * is not needed. + */ + if (split_flag & EXT4_EXT_MARK_UNINIT2) + ext4_ext_mark_uninitialized(ex); + else + ext4_ext_mark_initialized(ex); + + if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) + ext4_ext_try_to_merge(inode, path, ex); + + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; + } + + /* case a */ + memcpy(&orig_ex, ex, sizeof(orig_ex)); + ex->ee_len = cpu_to_le16(split - ee_block); + if (split_flag & EXT4_EXT_MARK_UNINIT1) + ext4_ext_mark_uninitialized(ex); + + /* + * path may lead to new leaf, not to original leaf any more + * after ext4_ext_insert_extent() returns, + */ + err = ext4_ext_dirty(handle, inode, path + depth); + if (err) + goto fix_extent_len; + + ex2 = &newex; + ex2->ee_block = cpu_to_le32(split); + ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); + ext4_ext_store_pblock(ex2, newblock); + if (split_flag & EXT4_EXT_MARK_UNINIT2) + ext4_ext_mark_uninitialized(ex2); + + err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_len = cpu_to_le32(ee_len); + ext4_ext_try_to_merge(inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; + } else if (err) + goto fix_extent_len; + +out: + ext4_ext_show_leaf(inode, path); + return err; + +fix_extent_len: + ex->ee_len = orig_ex.ee_len; + ext4_ext_dirty(handle, inode, path + depth); + return err; +} + +/* + * ext4_split_extents() splits an extent and mark extent which is covered + * by @map as split_flags indicates + * + * It may result in splitting the extent into multiple extents (upto three) + * There are three possibilities: + * a> There is no split required + * b> Splits in two extents: Split is happening at either end of the extent + * c> Splits in three extents: Somone is splitting in middle of the extent + * + */ +static int ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, + int split_flag, + int flags) +{ + ext4_lblk_t ee_block; + struct ext4_extent *ex; + unsigned int ee_len, depth; + int err = 0; + int uninitialized; + int split_flag1, flags1; + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + uninitialized = ext4_ext_is_uninitialized(ex); + + if (map->m_lblk + map->m_len < ee_block + ee_len) { + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? + EXT4_EXT_MAY_ZEROOUT : 0; + flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; + if (uninitialized) + split_flag1 |= EXT4_EXT_MARK_UNINIT1 | + EXT4_EXT_MARK_UNINIT2; + err = ext4_split_extent_at(handle, inode, path, + map->m_lblk + map->m_len, split_flag1, flags1); + if (err) + goto out; + } + + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); + if (IS_ERR(path)) + return PTR_ERR(path); + + if (map->m_lblk >= ee_block) { + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? + EXT4_EXT_MAY_ZEROOUT : 0; + if (uninitialized) + split_flag1 |= EXT4_EXT_MARK_UNINIT1; + if (split_flag & EXT4_EXT_MARK_UNINIT2) + split_flag1 |= EXT4_EXT_MARK_UNINIT2; + err = ext4_split_extent_at(handle, inode, path, + map->m_lblk, split_flag1, flags); + if (err) + goto out; + } + + ext4_ext_show_leaf(inode, path); +out: + return err ? err : map->m_len; +} + #define EXT4_EXT_ZERO_LEN 7 /* * This function is called by ext4_ext_map_blocks() if someone tries to write @@ -2545,17 +2944,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_map_blocks *map, struct ext4_ext_path *path) { - struct ext4_extent *ex, newex, orig_ex; - struct ext4_extent *ex1 = NULL; - struct ext4_extent *ex2 = NULL; - struct ext4_extent *ex3 = NULL; - struct ext4_extent_header *eh; + struct ext4_map_blocks split_map; + struct ext4_extent zero_ex; + struct ext4_extent *ex; ext4_lblk_t ee_block, eof_block; unsigned int allocated, ee_len, depth; - ext4_fsblk_t newblock; int err = 0; - int ret = 0; - int may_zeroout; + int split_flag = 0; ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, @@ -2567,280 +2962,86 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, eof_block = map->m_lblk + map->m_len; depth = ext_depth(inode); - eh = path[depth].p_hdr; ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); allocated = ee_len - (map->m_lblk - ee_block); - newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); - - ex2 = ex; - orig_ex.ee_block = ex->ee_block; - orig_ex.ee_len = cpu_to_le16(ee_len); - ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); + WARN_ON(map->m_lblk < ee_block); /* * It is safe to convert extent to initialized via explicit * zeroout only if extent is fully insde i_size or new_size. */ - may_zeroout = ee_block + ee_len <= eof_block; + split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ - if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); + if (ee_len <= 2*EXT4_EXT_ZERO_LEN && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + err = ext4_ext_zeroout(inode, ex); if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ - return allocated; - } - - /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ - if (map->m_lblk > ee_block) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* - * for sanity, update the length of the ex2 extent before - * we insert ex3, if ex1 is NULL. This is to avoid temporary - * overlap of blocks. - */ - if (!ex1 && allocated > map->m_len) - ex2->ee_len = cpu_to_le16(map->m_len); - /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > map->m_len) { - unsigned int newdepth; - /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ - if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { - /* - * map->m_lblk == ee_block is handled by the zerouout - * at the beginning. - * Mark first half uninitialized. - * Mark second half initialized and zero out the - * initialized extent - */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = cpu_to_le16(ee_len - allocated); - ext4_ext_mark_uninitialized(ex); - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - - ex3 = &newex; - ex3->ee_block = cpu_to_le32(map->m_lblk); - ext4_ext_store_pblock(ex3, newblock); - ex3->ee_len = cpu_to_le16(allocated); - err = ext4_ext_insert_extent(handle, inode, path, - ex3, 0); - if (err == -ENOSPC) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, - ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* blocks available from map->m_lblk */ - return allocated; - - } else if (err) - goto fix_extent_len; - - /* - * We need to zero out the second half because - * an fallocate request can update file size and - * converting the second half to initialized extent - * implies that we can leak some junk data to user - * space. - */ - err = ext4_ext_zeroout(inode, ex3); - if (err) { - /* - * We should actually mark the - * second half as uninit and return error - * Insert would have changed the extent - */ - depth = ext_depth(inode); - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, - path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - return err; - } - /* get the second half extent details */ - ex = path[depth].p_ext; - err = ext4_ext_get_access(handle, inode, - path + depth); - if (err) - return err; - ext4_ext_mark_uninitialized(ex); - ext4_ext_dirty(handle, inode, path + depth); - return err; - } - - /* zeroed the second half */ - return allocated; - } - ex3 = &newex; - ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); - ext4_ext_store_pblock(ex3, newblock + map->m_len); - ex3->ee_len = cpu_to_le16(allocated - map->m_len); - ext4_ext_mark_uninitialized(ex3); - err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ - /* blocks available from map->m_lblk */ - return allocated; - - } else if (err) - goto fix_extent_len; - /* - * The depth, and hence eh & ex might change - * as part of the insert above. - */ - newdepth = ext_depth(inode); - /* - * update the extent length after successful insert of the - * split extent - */ - ee_len -= ext4_ext_get_actual_len(ex3); - orig_ex.ee_len = cpu_to_le16(ee_len); - may_zeroout = ee_block + ee_len <= eof_block; - - depth = newdepth; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); goto out; - } - eh = path[depth].p_hdr; - ex = path[depth].p_ext; - if (ex2 != &newex) - ex2 = ex; err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - - allocated = map->m_len; - - /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying - * to insert a extent in the middle zerout directly - * otherwise give the extent a chance to merge to left - */ - if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && - map->m_lblk != ee_block && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zero out the first half */ - /* blocks available from map->m_lblk */ - return allocated; - } - } - /* - * If there was a change of depth as part of the - * insertion of ex3 above, we need to update the length - * of the ex1 extent again here - */ - if (ex1 && ex1 != ex) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ - ex2->ee_block = cpu_to_le32(map->m_lblk); - ext4_ext_store_pblock(ex2, newblock); - ex2->ee_len = cpu_to_le16(allocated); - if (ex2 != ex) - goto insert; - /* - * New (initialized) extent starts from the first block - * in the current extent. i.e., ex2 == ex - * We have to see if it can be merged with the extent - * on the left. - */ - if (ex2 > EXT_FIRST_EXTENT(eh)) { - /* - * To merge left, pass "ex2 - 1" to try_to_merge(), - * since it merges towards right _only_. - */ - ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); - if (err) - goto out; - depth = ext_depth(inode); - ex2--; - } + ext4_ext_mark_initialized(ex); + ext4_ext_try_to_merge(inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; } + /* - * Try to Merge towards right. This might be required - * only when the whole extent is being written to. - * i.e. ex2 == ex and ex3 == NULL. + * four cases: + * 1. split the extent into three extents. + * 2. split the extent into two extents, zeroout the first half. + * 3. split the extent into two extents, zeroout the second half. + * 4. split the extent into two extents with out zeroout. */ - if (!ex3) { - ret = ext4_ext_try_to_merge(inode, path, ex2); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); + split_map.m_lblk = map->m_lblk; + split_map.m_len = map->m_len; + + if (allocated > map->m_len) { + if (allocated <= EXT4_EXT_ZERO_LEN && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + /* case 3 */ + zero_ex.ee_block = + cpu_to_le32(map->m_lblk); + zero_ex.ee_len = cpu_to_le16(allocated); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex) + map->m_lblk - ee_block); + err = ext4_ext_zeroout(inode, &zero_ex); if (err) goto out; + split_map.m_lblk = map->m_lblk; + split_map.m_len = allocated; + } else if ((map->m_lblk - ee_block + map->m_len < + EXT4_EXT_ZERO_LEN) && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + /* case 2 */ + if (map->m_lblk != ee_block) { + zero_ex.ee_block = ex->ee_block; + zero_ex.ee_len = cpu_to_le16(map->m_lblk - + ee_block); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex)); + err = ext4_ext_zeroout(inode, &zero_ex); + if (err) + goto out; + } + + split_map.m_lblk = ee_block; + split_map.m_len = map->m_lblk - ee_block + map->m_ |