From 78fb9cdf035d88a39a5e5f83bb8788e4fe7c1f72 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:32:35 -0400 Subject: ext4: remove unused code from ext4_remove_blocks() The "head removal" branch in the condition is never used in any code path in ext4 since the function only caller ext4_ext_rm_leaf() will make sure that the extent is properly split before removing blocks. Note that there is a bug in this branch anyway. This commit removes the unused code completely and makes use of ext4_error() instead of printk if dubious range is provided. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc0f1910b9c..18c3f1a131b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2432,23 +2432,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, *partial_cluster = EXT4_B2C(sbi, pblk); else *partial_cluster = 0; - } else if (from == le32_to_cpu(ex->ee_block) - && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { - /* head removal */ - ext4_lblk_t num; - ext4_fsblk_t start; - - num = to - from; - start = ext4_ext_pblock(ex); - - ext_debug("free first %u blocks starting %llu\n", num, start); - ext4_free_blocks(handle, inode, NULL, start, num, flags); - - } else { - printk(KERN_INFO "strange request: removal(2) " - "%u-%u from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), ee_len); - } + } else + ext4_error(sbi->s_sb, "strange request: removal(2) " + "%u-%u from %u:%u\n", + from, to, le32_to_cpu(ex->ee_block), ee_len); return 0; } -- cgit v1.2.3-18-g5258 From 61801325f790ea15ba0630a7a26bd80a0741813f Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:32:35 -0400 Subject: ext4: update ext4_ext_remove_space trace point Add "end" variable. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 18c3f1a131b..fb9b41483c8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2663,7 +2663,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, return PTR_ERR(handle); again: - trace_ext4_ext_remove_space(inode, start, depth); + trace_ext4_ext_remove_space(inode, start, end, depth); /* * Check if we are removing extents inside the extent tree. If that @@ -2831,8 +2831,8 @@ again: } } - trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster, - path->p_hdr->eh_entries); + trace_ext4_ext_remove_space_done(inode, start, end, depth, + partial_cluster, path->p_hdr->eh_entries); /* If we still have something in the partial cluster and we have removed * even the first extent, then we should free the blocks in the partial -- cgit v1.2.3-18-g5258 From d23142c6271c499d913d0d5e668b5a4eb6dafcb0 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:33:35 -0400 Subject: ext4: make punch hole code path work with bigalloc Currently punch hole is disabled in file systems with bigalloc feature enabled. However the recent changes in punch hole patch should make it easier to support punching holes on bigalloc enabled file systems. This commit changes partial_cluster handling in ext4_remove_blocks(), ext4_ext_rm_leaf() and ext4_ext_remove_space(). Currently partial_cluster is unsigned long long type and it makes sure that we will free the partial cluster if all extents has been released from that cluster. However it has been specifically designed only for truncate. With punch hole we can be freeing just some extents in the cluster leaving the rest untouched. So we have to make sure that we will notice cluster which still has some extents. To do this I've changed partial_cluster to be signed long long type. The only scenario where this could be a problem is when cluster_size == block size, however in that case there would not be any partial clusters so we're safe. For bigger clusters the signed type is enough. Now we use the negative value in partial_cluster to mark such cluster used, hence we know that we must not free it even if all other extents has been freed from such cluster. This scenario can be described in simple diagram: |FFF...FF..FF.UUU| ^----------^ punch hole . - free space | - cluster boundary F - freed extent U - used extent Also update respective tracepoints to use signed long long type for partial_cluster. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 69 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 18 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index fb9b41483c8..214e68a5e79 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2359,7 +2359,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, - ext4_fsblk_t *partial_cluster, + long long *partial_cluster, ext4_lblk_t from, ext4_lblk_t to) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2388,7 +2388,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, * partial cluster here. */ pblk = ext4_ext_pblock(ex) + ee_len - 1; - if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { + if ((*partial_cluster > 0) && + (EXT4_B2C(sbi, pblk) != *partial_cluster)) { ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, *partial_cluster), sbi->s_cluster_ratio, flags); @@ -2414,23 +2415,41 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { /* tail removal */ ext4_lblk_t num; + unsigned int unaligned; num = le32_to_cpu(ex->ee_block) + ee_len - from; pblk = ext4_ext_pblock(ex) + ee_len - num; - ext_debug("free last %u blocks starting %llu\n", num, pblk); + /* + * Usually we want to free partial cluster at the end of the + * extent, except for the situation when the cluster is still + * used by any other extent (partial_cluster is negative). + */ + if (*partial_cluster < 0 && + -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1)) + flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; + + ext_debug("free last %u blocks starting %llu partial %lld\n", + num, pblk, *partial_cluster); ext4_free_blocks(handle, inode, NULL, pblk, num, flags); /* * If the block range to be freed didn't start at the * beginning of a cluster, and we removed the entire - * extent, save the partial cluster here, since we - * might need to delete if we determine that the - * truncate operation has removed all of the blocks in - * the cluster. + * extent and the cluster is not used by any other extent, + * save the partial cluster here, since we might need to + * delete if we determine that the truncate operation has + * removed all of the blocks in the cluster. + * + * On the other hand, if we did not manage to free the whole + * extent, we have to mark the cluster as used (store negative + * cluster number in partial_cluster). */ - if (pblk & (sbi->s_cluster_ratio - 1) && - (ee_len == num)) + unaligned = pblk & (sbi->s_cluster_ratio - 1); + if (unaligned && (ee_len == num) && + (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) *partial_cluster = EXT4_B2C(sbi, pblk); - else + else if (unaligned) + *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); + else if (*partial_cluster > 0) *partial_cluster = 0; } else ext4_error(sbi->s_sb, "strange request: removal(2) " @@ -2448,12 +2467,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, * @handle: The journal handle * @inode: The files inode * @path: The path to the leaf + * @partial_cluster: The cluster which we'll have to free if all extents + * has been released from it. It gets negative in case + * that the cluster is still used. * @start: The first block to remove * @end: The last block to remove */ static int ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, + struct ext4_ext_path *path, + long long *partial_cluster, ext4_lblk_t start, ext4_lblk_t end) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2466,6 +2489,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned short ex_ee_len; unsigned uninitialized = 0; struct ext4_extent *ex; + ext4_fsblk_t pblk; /* the header must be checked already in ext4_ext_remove_space() */ ext_debug("truncate since %u in leaf to %u\n", start, end); @@ -2504,6 +2528,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* If this extent is beyond the end of the hole, skip it */ if (end < ex_ee_block) { + /* + * We're going to skip this extent and move to another, + * so if this extent is not cluster aligned we have + * to mark the current cluster as used to avoid + * accidentally freeing it later on + */ + pblk = ext4_ext_pblock(ex); + if (pblk & (sbi->s_cluster_ratio - 1)) + *partial_cluster = + -((long long)EXT4_B2C(sbi, pblk)); ex--; ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); @@ -2579,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, sizeof(struct ext4_extent)); } le16_add_cpu(&eh->eh_entries, -1); - } else + } else if (*partial_cluster > 0) *partial_cluster = 0; err = ext4_ext_dirty(handle, inode, path + depth); @@ -2597,11 +2631,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, err = ext4_ext_correct_indexes(handle, inode, path); /* - * If there is still a entry in the leaf node, check to see if - * it references the partial cluster. This is the only place - * where it could; if it doesn't, we can free the cluster. + * Free the partial cluster only if the current extent does not + * reference it. Otherwise we might free used cluster. */ - if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && + if (*partial_cluster > 0 && (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != *partial_cluster)) { int flags = EXT4_FREE_BLOCKS_FORGET; @@ -2651,7 +2684,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); struct ext4_ext_path *path = NULL; - ext4_fsblk_t partial_cluster = 0; + long long partial_cluster = 0; handle_t *handle; int i = 0, err = 0; @@ -2837,7 +2870,7 @@ again: /* If we still have something in the partial cluster and we have removed * even the first extent, then we should free the blocks in the partial * cluster as well. */ - if (partial_cluster && path->p_hdr->eh_entries == 0) { + if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { int flags = EXT4_FREE_BLOCKS_FORGET; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) -- cgit v1.2.3-18-g5258 From a60697f411eb365fb09e639e6f183fe33d1eb796 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 May 2013 19:38:56 -0400 Subject: ext4: fix data offset overflow in ext4_xattr_fiemap() on 32-bit archs On 32-bit architectures with 32-bit sector_t computation of data offset in ext4_xattr_fiemap() can overflow resulting in reporting bogus data location. Fix the problem by typing block number to proper type before shifting. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 214e68a5e79..299ee9df546 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4679,7 +4679,7 @@ static int ext4_xattr_fiemap(struct inode *inode, error = ext4_get_inode_loc(inode, &iloc); if (error) return error; - physical = iloc.bh->b_blocknr << blockbits; + physical = (__u64)iloc.bh->b_blocknr << blockbits; offset = EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize; physical += offset; @@ -4687,7 +4687,7 @@ static int ext4_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); } else { /* external block */ - physical = EXT4_I(inode)->i_file_acl << blockbits; + physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; } -- cgit v1.2.3-18-g5258 From fffb273997cc52f255bde5f18e7f6b4686c914fb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 13:01:11 -0400 Subject: ext4: better estimate credits needed for ext4_da_writepages() We limit the number of blocks written in a single loop of ext4_da_writepages() to 64 when inode uses indirect blocks. That is unnecessary as credit estimates for mapping logically continguous run of blocks is rather low even for inode with indirect blocks. So just lift this limitation and properly calculate the number of necessary credits. This better credit estimate will also later allow us to always write at least a single page in one iteration. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 299ee9df546..94283d06cac 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2328,17 +2328,15 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, } /* - * How many index/leaf blocks need to change/allocate to modify nrblocks? + * How many index/leaf blocks need to change/allocate to add @extents extents? * - * if nrblocks are fit in a single extent (chunk flag is 1), then - * in the worse case, each tree level index/leaf need to be changed - * if the tree split due to insert a new extent, then the old tree - * index/leaf need to be updated too + * If we add a single extent, then in the worse case, each tree level + * index/leaf need to be changed in case of the tree split. * - * If the nrblocks are discontiguous, they could cause - * the whole tree split more than once, but this is really rare. + * If more extents are inserted, they could cause the whole tree split more + * than once, but this is really rare. */ -int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) +int ext4_ext_index_trans_blocks(struct inode *inode, int extents) { int index; int depth; @@ -2349,7 +2347,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) depth = ext_depth(inode); - if (chunk) + if (extents <= 1) index = depth * 2; else index = depth * 3; -- cgit v1.2.3-18-g5258 From 6b523df4fb5ae281ddbc817f40504b33e6226554 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 13:21:11 -0400 Subject: ext4: use transaction reservation for extent conversion in ext4_end_io Later we would like to clear PageWriteback bit only after extent conversion from unwritten to written extents is performed. However it is not possible to start a transaction after PageWriteback is set because that violates lock ordering (and is easy to deadlock). So we have to reserve a transaction before locking pages and sending them for IO and later we use the transaction for extent conversion from ext4_end_io(). Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 94283d06cac..208f664f9ee 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4566,10 +4566,9 @@ retry: * function, to convert the fallocated extents after IO is completed. * Returns 0 on success. */ -int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - ssize_t len) +int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, + loff_t offset, ssize_t len) { - handle_t *handle; unsigned int max_blocks; int ret = 0; int ret2 = 0; @@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - map.m_lblk); /* - * credits to insert 1 extent into extent tree + * This is somewhat ugly but the idea is clear: When transaction is + * reserved, everything goes into it. Otherwise we rather start several + * smaller transactions for conversion of each extent separately. */ - credits = ext4_chunk_trans_blocks(inode, max_blocks); + if (handle) { + handle = ext4_journal_start_reserved(handle, + EXT4_HT_EXT_CONVERT); + if (IS_ERR(handle)) + return PTR_ERR(handle); + credits = 0; + } else { + /* + * credits to insert 1 extent into extent tree + */ + credits = ext4_chunk_trans_blocks(inode, max_blocks); + } while (ret >= 0 && ret < max_blocks) { map.m_lblk += ret; map.m_len = (max_blocks -= ret); - handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - break; + if (credits) { + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, + credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + break; + } } ret = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_IO_CONVERT_EXT); @@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, inode->i_ino, map.m_lblk, map.m_len, ret); ext4_mark_inode_dirty(handle, inode); - ret2 = ext4_journal_stop(handle); - if (ret <= 0 || ret2 ) + if (credits) + ret2 = ext4_journal_stop(handle); + if (ret <= 0 || ret2) break; } + if (!credits) + ret2 = ext4_journal_stop(handle); return ret > 0 ? ret2 : ret; } -- cgit v1.2.3-18-g5258 From 981250ca89261f98bdfd2d6be1fcccb96cbbc00d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 12 Jun 2013 11:48:29 -0400 Subject: ext4: don't use EXT4_FREE_BLOCKS_FORGET unnecessarily Commit 18888cf0883c: "ext4: speed up truncate/unlink by not using bforget() unless needed" removed the use of EXT4_FREE_BLOCKS_FORGET in the most important codepath for file systems using extents, but a similar optimization also can be done for file systems using indirect blocks, and for the two special cases in the ext4 extents code. Cc: Andrey Sidorov Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 208f664f9ee..d04f4093639 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2355,6 +2355,15 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int extents) return index; } +static inline int get_default_free_blocks_flags(struct inode *inode) +{ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; + else if (ext4_should_journal_data(inode)) + return EXT4_FREE_BLOCKS_FORGET; + return 0; +} + static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, long long *partial_cluster, @@ -2363,12 +2372,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned short ee_len = ext4_ext_get_actual_len(ex); ext4_fsblk_t pblk; - int flags = 0; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; - else if (ext4_should_journal_data(inode)) - flags |= EXT4_FREE_BLOCKS_FORGET; + int flags = get_default_free_blocks_flags(inode); /* * For bigalloc file systems, we never free a partial cluster @@ -2635,10 +2639,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (*partial_cluster > 0 && (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != *partial_cluster)) { - int flags = EXT4_FREE_BLOCKS_FORGET; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; + int flags = get_default_free_blocks_flags(inode); ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, *partial_cluster), @@ -2869,10 +2870,7 @@ again: * even the first extent, then we should free the blocks in the partial * cluster as well. */ if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { - int flags = EXT4_FREE_BLOCKS_FORGET; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; + int flags = get_default_free_blocks_flags(inode); ext4_free_blocks(handle, inode, NULL, EXT4_C2B(EXT4_SB(sb), partial_cluster), -- cgit v1.2.3-18-g5258 From 72dac95d4403ee7231e094e1735ecb96032e6117 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 12 Jun 2013 23:13:59 -0400 Subject: ext4: return FIEMAP_EXTENT_UNKNOWN for delalloc extents Return the FIEMAP_EXTENT_UNKNOWN flag as well except the FIEMAP_EXTENT_DELALLOC because the data location of an delayed allocation extent is unknown. Signed-off-by: Jie Liu --- fs/ext4/extents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d04f4093639..51e41687f51 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2125,7 +2125,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, next_del = ext4_find_delayed_extent(inode, &es); if (!exists && next_del) { exists = 1; - flags |= FIEMAP_EXTENT_DELALLOC; + flags |= (FIEMAP_EXTENT_DELALLOC | + FIEMAP_EXTENT_UNKNOWN); } up_read(&EXT4_I(inode)->i_data_sem); -- cgit v1.2.3-18-g5258 From aeb2817a4ea99f62532adf3377be3b282d3bda12 Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 1 Jul 2013 08:12:38 -0400 Subject: ext4: pass inode pointer instead of file pointer to punch hole No need to pass file pointer when we can directly pass inode pointer. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 51e41687f51..937593e2f00 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4463,7 +4463,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) - return ext4_punch_hole(file, offset, len); + return ext4_punch_hole(inode, offset, len); ret = ext4_convert_inline_data(inode); if (ret) -- cgit v1.2.3-18-g5258 From 21ddd568c133024196d394c43923f55cad1e7bd0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Jul 2013 08:12:40 -0400 Subject: ext4: translate flag bits to strings in tracepoints Translate the bitfields used in various flags argument to strings to make the tracepoint output more human-readable. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 937593e2f00..575faa09025 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4380,7 +4380,7 @@ out2: } out3: - trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated); + trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); return err ? err : allocated; } -- cgit v1.2.3-18-g5258 From 6ae06ff51eab5dcbbf959b05ce0f11003a305ba5 Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 1 Jul 2013 08:12:41 -0400 Subject: ext4: optimize starting extent in ext4_ext_rm_leaf() Both hole punch and truncate use ext4_ext_rm_leaf() for removing blocks. Currently we choose the last extent as the starting point for removing blocks: ex = EXT_LAST_EXTENT(eh); This is OK for truncate but for hole punch we can optimize the extent selection as the path is already initialized. We could use this information to select proper starting extent. The code change in this patch will not affect truncate as for truncate path[depth].p_ext will always be NULL. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 575faa09025..7097b0f680e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2504,7 +2504,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, return -EIO; } /* find where to start removing */ - ex = EXT_LAST_EXTENT(eh); + ex = path[depth].p_ext; + if (!ex) + ex = EXT_LAST_EXTENT(eh); ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); -- cgit v1.2.3-18-g5258 From 8acd5e9b1217e58a57124d9e225afa12efeae20d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 00:09:19 -0400 Subject: ext4: fix error handling in ext4_ext_truncate() Previously ext4_ext_truncate() was ignoring potential error returns from ext4_es_remove_extent() and ext4_ext_remove_space(). This can lead to the on-diks extent tree and the extent status tree cache getting out of sync, which is particuarlly bad, and can lead to file system corruption and potential data loss. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7097b0f680e..f57cc0e7f1b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4405,9 +4405,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); +retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); + if (err == ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (err) { + ext4_std_error(inode->i_sb, err); + return; + } err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + ext4_std_error(inode->i_sb, err); } static void ext4_falloc_update_inode(struct inode *inode, -- cgit v1.2.3-18-g5258 From c8e15130e1636f68d5165aa2605b8e9cba0f644c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 00:09:37 -0400 Subject: ext4: simplify calculation of blocks to free on error In ext4_ext_map_blocks(), if we have successfully allocated the data blocks, but then run into trouble inserting the extent into the extent tree, most likely due to an ENOSPC condition, determine the arguments to ext4_free_blocks() in a simpler way which is easier to prove to be correct. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f57cc0e7f1b..593091537e7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4261,8 +4261,8 @@ got_allocated_blocks: /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), fb_flags); + ext4_free_blocks(handle, inode, NULL, newblock, + EXT4_C2B(sbi, allocated_clusters), fb_flags); goto out2; } -- cgit v1.2.3-18-g5258 From 76828c882630ced08b5ddce22cc0095b05de9bc5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 12:27:47 -0400 Subject: ext4: yield during large unlinks During large unlink operations on files with extents, we can use a lot of CPU time. This adds a cond_resched() call when starting to examine the next level of a multi-level extent tree. Multi-level extent trees are rare in the first place, and this should rarely be executed. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 593091537e7..cfdc51e3025 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2835,6 +2835,9 @@ again: err = -EIO; break; } + /* Yield here to deal with large extent trees. + * Should be a no-op if we did IO above. */ + cond_resched(); if (WARN_ON(i + 1 > depth)) { err = -EIO; break; -- cgit v1.2.3-18-g5258 From 63b999685cb372e24eb73f255cd73547026370fd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 16 Jul 2013 10:28:47 -0400 Subject: ext4: call ext4_es_lru_add() after handling cache miss If there are no items in the extent status tree, ext4_es_lru_add() is a no-op. So it is not sufficient to call ext4_es_lru_add() before we try to lookup an entry in the extent status tree. We also need to call it at the end of ext4_ext_map_blocks(), after items have been added to the extent status tree. This could lead to inodes with that have extent status trees but which are not in the LRU list, which means they won't get considered for eviction by the es_shrinker. Signed-off-by: "Theodore Ts'o" Cc: Zheng Liu Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cfdc51e3025..a61873808f7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4385,8 +4385,9 @@ out2: } out3: - trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); - + trace_ext4_ext_map_blocks_exit(inode, flags, map, + err ? err : allocated); + ext4_es_lru_add(inode); return err ? err : allocated; } -- cgit v1.2.3-18-g5258 From 94eec0fc3520c759831763d866421b4d60b599b4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 29 Jul 2013 12:12:56 -0400 Subject: ext4: fix retry handling in ext4_ext_truncate() We tested for ENOMEM instead of -ENOMEM. Oops. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a61873808f7..72ba4705d4f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4412,7 +4412,7 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); - if (err == ENOMEM) { + if (err == -ENOMEM) { cond_resched(); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; -- cgit v1.2.3-18-g5258