diff options
Diffstat (limited to 'fs/ext4/extents_status.c')
| -rw-r--r-- | fs/ext4/extents_status.c | 261 |
1 files changed, 177 insertions, 84 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index ee018d5f397..0b7e28e7eaa 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -13,7 +13,6 @@ #include <linux/list_sort.h> #include "ext4.h" #include "extents_status.h" -#include "ext4_extents.h" #include <trace/events/ext4.h> @@ -148,6 +147,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t end); static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, int nr_to_scan); +static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, + struct ext4_inode_info *locked_ei); int __init ext4_init_es(void) { @@ -183,7 +184,7 @@ static void ext4_es_print_tree(struct inode *inode) while (node) { struct extent_status *es; es = rb_entry(node, struct extent_status, rb_node); - printk(KERN_DEBUG " [%u/%u) %llu %llx", + printk(KERN_DEBUG " [%u/%u) %llu %x", es->es_lblk, es->es_len, ext4_es_pblock(es), ext4_es_status(es)); node = rb_next(node); @@ -261,7 +262,7 @@ void ext4_es_find_delayed_extent_range(struct inode *inode, if (tree->cache_es) { es1 = tree->cache_es; if (in_range(lblk, es1->es_lblk, es1->es_len)) { - es_debug("%u cached by [%u/%u) %llu %llx\n", + es_debug("%u cached by [%u/%u) %llu %x\n", lblk, es1->es_lblk, es1->es_len, ext4_es_pblock(es1), ext4_es_status(es1)); goto out; @@ -343,8 +344,14 @@ static int ext4_es_can_be_merged(struct extent_status *es1, if (ext4_es_status(es1) != ext4_es_status(es2)) return 0; - if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) + if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { + pr_warn("ES assertion failed when merging extents. " + "The sum of lengths of es1 (%d) and es2 (%d) " + "is bigger than allowed file size (%d)\n", + es1->es_len, es2->es_len, EXT_MAX_BLOCKS); + WARN_ON(1); return 0; + } if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) return 0; @@ -407,6 +414,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) } #ifdef ES_AGGRESSIVE_TEST +#include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */ + static void ext4_es_insert_extent_ext_check(struct inode *inode, struct extent_status *es) { @@ -417,7 +426,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, unsigned short ee_len; int depth, ee_status, es_status; - path = ext4_ext_find_extent(inode, es->es_lblk, NULL); + path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); if (IS_ERR(path)) return; @@ -430,7 +439,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, ee_start = ext4_ext_pblock(ex); ee_len = ext4_ext_get_actual_len(ex); - ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0; + ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0; es_status = ext4_es_is_unwritten(es) ? 1 : 0; /* @@ -439,11 +448,11 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, */ if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { if (in_range(es->es_lblk, ee_block, ee_len)) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu we can find an extent " "at block [%d/%d/%llu/%c], but we " - "want to add an delayed/hole extent " - "[%d/%d/%llu/%llx]\n", + "want to add a delayed/hole extent " + "[%d/%d/%llu/%x]\n", inode->i_ino, ee_block, ee_len, ee_start, ee_status ? 'u' : 'w', es->es_lblk, es->es_len, @@ -458,7 +467,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, */ if (es->es_lblk < ee_block || ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "ex_status [%d/%d/%llu/%c] != " "es_status [%d/%d/%llu/%c]\n", inode->i_ino, ee_block, ee_len, ee_start, @@ -468,7 +477,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, } if (ee_status ^ es_status) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "ex_status [%d/%d/%llu/%c] != " "es_status [%d/%d/%llu/%c]\n", inode->i_ino, ee_block, ee_len, ee_start, @@ -481,10 +490,10 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, * that we don't want to add an written/unwritten extent. */ if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "can't find an extent at block %d but we want " - "to add an written/unwritten extent " - "[%d/%d/%llu/%llx]\n", inode->i_ino, + "to add a written/unwritten extent " + "[%d/%d/%llu/%x]\n", inode->i_ino, es->es_lblk, es->es_lblk, es->es_len, ext4_es_pblock(es), ext4_es_status(es)); } @@ -519,21 +528,21 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, * We want to add a delayed/hole extent but this * block has been allocated. */ - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "We can find blocks but we want to add a " - "delayed/hole extent [%d/%d/%llu/%llx]\n", + "delayed/hole extent [%d/%d/%llu/%x]\n", inode->i_ino, es->es_lblk, es->es_len, ext4_es_pblock(es), ext4_es_status(es)); return; } else if (ext4_es_is_written(es)) { if (retval != es->es_len) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu retval %d != es_len %d\n", inode->i_ino, retval, es->es_len); return; } if (map.m_pblk != ext4_es_pblock(es)) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu m_pblk %llu != " "es_pblk %llu\n", inode->i_ino, map.m_pblk, @@ -549,9 +558,9 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, } } else if (retval == 0) { if (ext4_es_is_written(es)) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "We can't find the block but we want to add " - "an written extent [%d/%d/%llu/%llx]\n", + "a written extent [%d/%d/%llu/%x]\n", inode->i_ino, es->es_lblk, es->es_len, ext4_es_pblock(es), ext4_es_status(es)); return; @@ -632,22 +641,20 @@ out: } /* - * ext4_es_insert_extent() adds a space to a extent status tree. - * - * ext4_es_insert_extent is called by ext4_da_write_begin and - * ext4_es_remove_extent. + * ext4_es_insert_extent() adds information to an inode's extent + * status tree. * * Return 0 on success, error code on failure. */ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk, - unsigned long long status) + unsigned int status) { struct extent_status newes; ext4_lblk_t end = lblk + len - 1; int err = 0; - es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n", + es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", lblk, len, pblk, status, inode->i_ino); if (!len) @@ -657,8 +664,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, newes.es_lblk = lblk; newes.es_len = len; - ext4_es_store_pblock(&newes, pblk); - ext4_es_store_status(&newes, status); + ext4_es_store_pblock_status(&newes, pblk, status); trace_ext4_es_insert_extent(inode, &newes); ext4_es_insert_extent_check(inode, &newes); @@ -667,7 +673,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, err = __es_remove_extent(inode, lblk, end); if (err != 0) goto error; +retry: err = __es_insert_extent(inode, &newes); + if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, + EXT4_I(inode))) + goto retry; + if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) + err = 0; error: write_unlock(&EXT4_I(inode)->i_es_lock); @@ -678,6 +690,37 @@ error: } /* + * ext4_es_cache_extent() inserts information into the extent status + * tree if and only if there isn't information about the range in + * question already. + */ +void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, ext4_fsblk_t pblk, + unsigned int status) +{ + struct extent_status *es; + struct extent_status newes; + ext4_lblk_t end = lblk + len - 1; + + newes.es_lblk = lblk; + newes.es_len = len; + ext4_es_store_pblock_status(&newes, pblk, status); + trace_ext4_es_cache_extent(inode, &newes); + + if (!len) + return; + + BUG_ON(end < lblk); + + write_lock(&EXT4_I(inode)->i_es_lock); + + es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); + if (!es || es->es_lblk > end) + __es_insert_extent(inode, &newes); + write_unlock(&EXT4_I(inode)->i_es_lock); +} + +/* * ext4_es_lookup_extent() looks up an extent in extent status tree. * * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. @@ -746,8 +789,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status orig_es; ext4_lblk_t len1, len2; ext4_fsblk_t block; - int err = 0; + int err; +retry: + err = 0; es = __es_tree_search(&tree->root, lblk); if (!es) goto out; @@ -771,17 +816,21 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, newes.es_lblk = end + 1; newes.es_len = len2; + block = 0x7FDEADBEEFULL; if (ext4_es_is_written(&orig_es) || - ext4_es_is_unwritten(&orig_es)) { + ext4_es_is_unwritten(&orig_es)) block = ext4_es_pblock(&orig_es) + orig_es.es_len - len2; - ext4_es_store_pblock(&newes, block); - } - ext4_es_store_status(&newes, ext4_es_status(&orig_es)); + ext4_es_store_pblock_status(&newes, block, + ext4_es_status(&orig_es)); err = __es_insert_extent(inode, &newes); if (err) { es->es_lblk = orig_es.es_lblk; es->es_len = orig_es.es_len; + if ((err == -ENOMEM) && + __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, + EXT4_I(inode))) + goto retry; goto out; } } else { @@ -859,23 +908,6 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, return err; } -int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) -{ - ext4_lblk_t ee_block; - ext4_fsblk_t ee_pblock; - unsigned int ee_len; - - ee_block = le32_to_cpu(ex->ee_block); - ee_len = ext4_ext_get_actual_len(ex); - ee_pblock = ext4_ext_pblock(ex); - - if (ee_len == 0) - return 0; - - return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, - EXTENT_STATUS_WRITTEN); -} - static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, struct list_head *b) { @@ -883,6 +915,12 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, eia = list_entry(a, struct ext4_inode_info, i_es_lru); eib = list_entry(b, struct ext4_inode_info, i_es_lru); + if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && + !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) + return 1; + if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && + ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) + return -1; if (eia->i_touch_when == eib->i_touch_when) return 0; if (time_after(eia->i_touch_when, eib->i_touch_when)) @@ -891,35 +929,21 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, return -1; } -static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) +static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, + struct ext4_inode_info *locked_ei) { - struct ext4_sb_info *sbi = container_of(shrink, - struct ext4_sb_info, s_es_shrinker); struct ext4_inode_info *ei; struct list_head *cur, *tmp; - LIST_HEAD(skiped); - int nr_to_scan = sc->nr_to_scan; - int ret, nr_shrunk = 0; - - ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); - trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); - - if (!nr_to_scan) - return ret; + LIST_HEAD(skipped); + int nr_shrunk = 0; + int retried = 0, skip_precached = 1, nr_skipped = 0; spin_lock(&sbi->s_es_lru_lock); - /* - * If the inode that is at the head of LRU list is newer than - * last_sorted time, that means that we need to sort this list. - */ - ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); - if (sbi->s_es_last_sorted < ei->i_touch_when) { - list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); - sbi->s_es_last_sorted = jiffies; - } - +retry: list_for_each_safe(cur, tmp, &sbi->s_es_lru) { + int shrunk; + /* * If we have already reclaimed all extents from extent * status tree, just stop the loop immediately. @@ -929,34 +953,96 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) ei = list_entry(cur, struct ext4_inode_info, i_es_lru); - /* Skip the inode that is newer than the last_sorted time */ - if (sbi->s_es_last_sorted < ei->i_touch_when) { - list_move_tail(cur, &skiped); + /* + * Skip the inode that is newer than the last_sorted + * time. Normally we try hard to avoid shrinking + * precached inodes, but we will as a last resort. + */ + if ((sbi->s_es_last_sorted < ei->i_touch_when) || + (skip_precached && ext4_test_inode_state(&ei->vfs_inode, + EXT4_STATE_EXT_PRECACHED))) { + nr_skipped++; + list_move_tail(cur, &skipped); continue; } - if (ei->i_es_lru_nr == 0) + if (ei->i_es_lru_nr == 0 || ei == locked_ei || + !write_trylock(&ei->i_es_lock)) continue; - write_lock(&ei->i_es_lock); - ret = __es_try_to_reclaim_extents(ei, nr_to_scan); + shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); if (ei->i_es_lru_nr == 0) list_del_init(&ei->i_es_lru); write_unlock(&ei->i_es_lock); - nr_shrunk += ret; - nr_to_scan -= ret; + nr_shrunk += shrunk; + nr_to_scan -= shrunk; if (nr_to_scan == 0) break; } /* Move the newer inodes into the tail of the LRU list. */ - list_splice_tail(&skiped, &sbi->s_es_lru); + list_splice_tail(&skipped, &sbi->s_es_lru); + INIT_LIST_HEAD(&skipped); + + /* + * If we skipped any inodes, and we weren't able to make any + * forward progress, sort the list and try again. + */ + if ((nr_shrunk == 0) && nr_skipped && !retried) { + retried++; + list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); + sbi->s_es_last_sorted = jiffies; + ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, + i_es_lru); + /* + * If there are no non-precached inodes left on the + * list, start releasing precached extents. + */ + if (ext4_test_inode_state(&ei->vfs_inode, + EXT4_STATE_EXT_PRECACHED)) + skip_precached = 0; + goto retry; + } + spin_unlock(&sbi->s_es_lru_lock); + if (locked_ei && nr_shrunk == 0) + nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); + + return nr_shrunk; +} + +static unsigned long ext4_es_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long nr; + struct ext4_sb_info *sbi; + + sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); + nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); + return nr; +} + +static unsigned long ext4_es_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + struct ext4_sb_info *sbi = container_of(shrink, + struct ext4_sb_info, s_es_shrinker); + int nr_to_scan = sc->nr_to_scan; + int ret, nr_shrunk; + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); + + if (!nr_to_scan) + return ret; + + nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); + trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); - return ret; + return nr_shrunk; } void ext4_es_register_shrinker(struct ext4_sb_info *sbi) @@ -964,7 +1050,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_es_lru); spin_lock_init(&sbi->s_es_lru_lock); sbi->s_es_last_sorted = 0; - sbi->s_es_shrinker.shrink = ext4_es_shrink; + sbi->s_es_shrinker.scan_objects = ext4_es_scan; + sbi->s_es_shrinker.count_objects = ext4_es_count; sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; register_shrinker(&sbi->s_es_shrinker); } @@ -1008,11 +1095,17 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, struct ext4_es_tree *tree = &ei->i_es_tree; struct rb_node *node; struct extent_status *es; - int nr_shrunk = 0; + unsigned long nr_shrunk = 0; + static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); if (ei->i_es_lru_nr == 0) return 0; + if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && + __ratelimit(&_rs)) + ext4_warning(inode->i_sb, "forced shrink of precached extents"); + node = rb_first(&tree->root); while (node != NULL) { es = rb_entry(node, struct extent_status, rb_node); |
