aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4/extents_status.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r--fs/ext4/extents_status.c294
1 files changed, 211 insertions, 83 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e6941e622d3..0b7e28e7eaa 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -10,9 +10,9 @@
* Ext4 extents status tree core functions.
*/
#include <linux/rbtree.h>
+#include <linux/list_sort.h>
#include "ext4.h"
#include "extents_status.h"
-#include "ext4_extents.h"
#include <trace/events/ext4.h>
@@ -147,6 +147,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end);
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
int nr_to_scan);
+static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei);
int __init ext4_init_es(void)
{
@@ -182,7 +184,7 @@ static void ext4_es_print_tree(struct inode *inode)
while (node) {
struct extent_status *es;
es = rb_entry(node, struct extent_status, rb_node);
- printk(KERN_DEBUG " [%u/%u) %llu %llx",
+ printk(KERN_DEBUG " [%u/%u) %llu %x",
es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
node = rb_next(node);
@@ -260,7 +262,7 @@ void ext4_es_find_delayed_extent_range(struct inode *inode,
if (tree->cache_es) {
es1 = tree->cache_es;
if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u) %llu %llx\n",
+ es_debug("%u cached by [%u/%u) %llu %x\n",
lblk, es1->es_lblk, es1->es_len,
ext4_es_pblock(es1), ext4_es_status(es1));
goto out;
@@ -291,7 +293,6 @@ out:
read_unlock(&EXT4_I(inode)->i_es_lock);
- ext4_es_lru_add(inode);
trace_ext4_es_find_delayed_extent_range_exit(inode, es);
}
@@ -343,8 +344,14 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
if (ext4_es_status(es1) != ext4_es_status(es2))
return 0;
- if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
+ if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
+ pr_warn("ES assertion failed when merging extents. "
+ "The sum of lengths of es1 (%d) and es2 (%d) "
+ "is bigger than allowed file size (%d)\n",
+ es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
+ WARN_ON(1);
return 0;
+ }
if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
return 0;
@@ -407,6 +414,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
}
#ifdef ES_AGGRESSIVE_TEST
+#include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */
+
static void ext4_es_insert_extent_ext_check(struct inode *inode,
struct extent_status *es)
{
@@ -417,7 +426,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
unsigned short ee_len;
int depth, ee_status, es_status;
- path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
+ path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path))
return;
@@ -430,7 +439,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
ee_start = ext4_ext_pblock(ex);
ee_len = ext4_ext_get_actual_len(ex);
- ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
+ ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
es_status = ext4_es_is_unwritten(es) ? 1 : 0;
/*
@@ -439,11 +448,11 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
*/
if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
if (in_range(es->es_lblk, ee_block, ee_len)) {
- pr_warn("ES insert assertation failed for "
+ pr_warn("ES insert assertion failed for "
"inode: %lu we can find an extent "
"at block [%d/%d/%llu/%c], but we "
- "want to add an delayed/hole extent "
- "[%d/%d/%llu/%llx]\n",
+ "want to add a delayed/hole extent "
+ "[%d/%d/%llu/%x]\n",
inode->i_ino, ee_block, ee_len,
ee_start, ee_status ? 'u' : 'w',
es->es_lblk, es->es_len,
@@ -458,7 +467,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
*/
if (es->es_lblk < ee_block ||
ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
- pr_warn("ES insert assertation failed for inode: %lu "
+ pr_warn("ES insert assertion failed for inode: %lu "
"ex_status [%d/%d/%llu/%c] != "
"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
ee_block, ee_len, ee_start,
@@ -468,7 +477,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
}
if (ee_status ^ es_status) {
- pr_warn("ES insert assertation failed for inode: %lu "
+ pr_warn("ES insert assertion failed for inode: %lu "
"ex_status [%d/%d/%llu/%c] != "
"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
ee_block, ee_len, ee_start,
@@ -481,10 +490,10 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
* that we don't want to add an written/unwritten extent.
*/
if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
- pr_warn("ES insert assertation failed for inode: %lu "
+ pr_warn("ES insert assertion failed for inode: %lu "
"can't find an extent at block %d but we want "
- "to add an written/unwritten extent "
- "[%d/%d/%llu/%llx]\n", inode->i_ino,
+ "to add a written/unwritten extent "
+ "[%d/%d/%llu/%x]\n", inode->i_ino,
es->es_lblk, es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
}
@@ -519,21 +528,21 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
* We want to add a delayed/hole extent but this
* block has been allocated.
*/
- pr_warn("ES insert assertation failed for inode: %lu "
+ pr_warn("ES insert assertion failed for inode: %lu "
"We can find blocks but we want to add a "
- "delayed/hole extent [%d/%d/%llu/%llx]\n",
+ "delayed/hole extent [%d/%d/%llu/%x]\n",
inode->i_ino, es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
return;
} else if (ext4_es_is_written(es)) {
if (retval != es->es_len) {
- pr_warn("ES insert assertation failed for "
+ pr_warn("ES insert assertion failed for "
"inode: %lu retval %d != es_len %d\n",
inode->i_ino, retval, es->es_len);
return;
}
if (map.m_pblk != ext4_es_pblock(es)) {
- pr_warn("ES insert assertation failed for "
+ pr_warn("ES insert assertion failed for "
"inode: %lu m_pblk %llu != "
"es_pblk %llu\n",
inode->i_ino, map.m_pblk,
@@ -549,9 +558,9 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
}
} else if (retval == 0) {
if (ext4_es_is_written(es)) {
- pr_warn("ES insert assertation failed for inode: %lu "
+ pr_warn("ES insert assertion failed for inode: %lu "
"We can't find the block but we want to add "
- "an written extent [%d/%d/%llu/%llx]\n",
+ "a written extent [%d/%d/%llu/%x]\n",
inode->i_ino, es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
return;
@@ -632,22 +641,20 @@ out:
}
/*
- * ext4_es_insert_extent() adds a space to a extent status tree.
- *
- * ext4_es_insert_extent is called by ext4_da_write_begin and
- * ext4_es_remove_extent.
+ * ext4_es_insert_extent() adds information to an inode's extent
+ * status tree.
*
* Return 0 on success, error code on failure.
*/
int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
- unsigned long long status)
+ unsigned int status)
{
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
int err = 0;
- es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
+ es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
lblk, len, pblk, status, inode->i_ino);
if (!len)
@@ -657,8 +664,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
newes.es_lblk = lblk;
newes.es_len = len;
- ext4_es_store_pblock(&newes, pblk);
- ext4_es_store_status(&newes, status);
+ ext4_es_store_pblock_status(&newes, pblk, status);
trace_ext4_es_insert_extent(inode, &newes);
ext4_es_insert_extent_check(inode, &newes);
@@ -667,18 +673,54 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
err = __es_remove_extent(inode, lblk, end);
if (err != 0)
goto error;
+retry:
err = __es_insert_extent(inode, &newes);
+ if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
+ EXT4_I(inode)))
+ goto retry;
+ if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
+ err = 0;
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
- ext4_es_lru_add(inode);
ext4_es_print_tree(inode);
return err;
}
/*
+ * ext4_es_cache_extent() inserts information into the extent status
+ * tree if and only if there isn't information about the range in
+ * question already.
+ */
+void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len, ext4_fsblk_t pblk,
+ unsigned int status)
+{
+ struct extent_status *es;
+ struct extent_status newes;
+ ext4_lblk_t end = lblk + len - 1;
+
+ newes.es_lblk = lblk;
+ newes.es_len = len;
+ ext4_es_store_pblock_status(&newes, pblk, status);
+ trace_ext4_es_cache_extent(inode, &newes);
+
+ if (!len)
+ return;
+
+ BUG_ON(end < lblk);
+
+ write_lock(&EXT4_I(inode)->i_es_lock);
+
+ es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
+ if (!es || es->es_lblk > end)
+ __es_insert_extent(inode, &newes);
+ write_unlock(&EXT4_I(inode)->i_es_lock);
+}
+
+/*
* ext4_es_lookup_extent() looks up an extent in extent status tree.
*
* ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
@@ -734,7 +776,6 @@ out:
read_unlock(&EXT4_I(inode)->i_es_lock);
- ext4_es_lru_add(inode);
trace_ext4_es_lookup_extent_exit(inode, es, found);
return found;
}
@@ -748,8 +789,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status orig_es;
ext4_lblk_t len1, len2;
ext4_fsblk_t block;
- int err = 0;
+ int err;
+retry:
+ err = 0;
es = __es_tree_search(&tree->root, lblk);
if (!es)
goto out;
@@ -773,17 +816,21 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
newes.es_lblk = end + 1;
newes.es_len = len2;
+ block = 0x7FDEADBEEFULL;
if (ext4_es_is_written(&orig_es) ||
- ext4_es_is_unwritten(&orig_es)) {
+ ext4_es_is_unwritten(&orig_es))
block = ext4_es_pblock(&orig_es) +
orig_es.es_len - len2;
- ext4_es_store_pblock(&newes, block);
- }
- ext4_es_store_status(&newes, ext4_es_status(&orig_es));
+ ext4_es_store_pblock_status(&newes, block,
+ ext4_es_status(&orig_es));
err = __es_insert_extent(inode, &newes);
if (err) {
es->es_lblk = orig_es.es_lblk;
es->es_len = orig_es.es_len;
+ if ((err == -ENOMEM) &&
+ __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
+ EXT4_I(inode)))
+ goto retry;
goto out;
}
} else {
@@ -861,85 +908,157 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
return err;
}
-int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
+static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
+ struct list_head *b)
{
- ext4_lblk_t ee_block;
- ext4_fsblk_t ee_pblock;
- unsigned int ee_len;
+ struct ext4_inode_info *eia, *eib;
+ eia = list_entry(a, struct ext4_inode_info, i_es_lru);
+ eib = list_entry(b, struct ext4_inode_info, i_es_lru);
- ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
- ee_pblock = ext4_ext_pblock(ex);
-
- if (ee_len == 0)
+ if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
+ !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
+ return 1;
+ if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
+ ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
+ return -1;
+ if (eia->i_touch_when == eib->i_touch_when)
return 0;
-
- return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
- EXTENT_STATUS_WRITTEN);
+ if (time_after(eia->i_touch_when, eib->i_touch_when))
+ return 1;
+ else
+ return -1;
}
-static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei)
{
- struct ext4_sb_info *sbi = container_of(shrink,
- struct ext4_sb_info, s_es_shrinker);
struct ext4_inode_info *ei;
- struct list_head *cur, *tmp, scanned;
- int nr_to_scan = sc->nr_to_scan;
- int ret, nr_shrunk = 0;
-
- ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
- trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
-
- if (!nr_to_scan)
- return ret;
-
- INIT_LIST_HEAD(&scanned);
+ struct list_head *cur, *tmp;
+ LIST_HEAD(skipped);
+ int nr_shrunk = 0;
+ int retried = 0, skip_precached = 1, nr_skipped = 0;
spin_lock(&sbi->s_es_lru_lock);
+
+retry:
list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
- list_move_tail(cur, &scanned);
+ int shrunk;
+
+ /*
+ * If we have already reclaimed all extents from extent
+ * status tree, just stop the loop immediately.
+ */
+ if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
+ break;
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
- read_lock(&ei->i_es_lock);
- if (ei->i_es_lru_nr == 0) {
- read_unlock(&ei->i_es_lock);
+ /*
+ * Skip the inode that is newer than the last_sorted
+ * time. Normally we try hard to avoid shrinking
+ * precached inodes, but we will as a last resort.
+ */
+ if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
+ (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
+ EXT4_STATE_EXT_PRECACHED))) {
+ nr_skipped++;
+ list_move_tail(cur, &skipped);
continue;
}
- read_unlock(&ei->i_es_lock);
- write_lock(&ei->i_es_lock);
- ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
+ if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
+ !write_trylock(&ei->i_es_lock))
+ continue;
+
+ shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
+ if (ei->i_es_lru_nr == 0)
+ list_del_init(&ei->i_es_lru);
write_unlock(&ei->i_es_lock);
- nr_shrunk += ret;
- nr_to_scan -= ret;
+ nr_shrunk += shrunk;
+ nr_to_scan -= shrunk;
if (nr_to_scan == 0)
break;
}
- list_splice_tail(&scanned, &sbi->s_es_lru);
+
+ /* Move the newer inodes into the tail of the LRU list. */
+ list_splice_tail(&skipped, &sbi->s_es_lru);
+ INIT_LIST_HEAD(&skipped);
+
+ /*
+ * If we skipped any inodes, and we weren't able to make any
+ * forward progress, sort the list and try again.
+ */
+ if ((nr_shrunk == 0) && nr_skipped && !retried) {
+ retried++;
+ list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
+ sbi->s_es_last_sorted = jiffies;
+ ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
+ i_es_lru);
+ /*
+ * If there are no non-precached inodes left on the
+ * list, start releasing precached extents.
+ */
+ if (ext4_test_inode_state(&ei->vfs_inode,
+ EXT4_STATE_EXT_PRECACHED))
+ skip_precached = 0;
+ goto retry;
+ }
+
spin_unlock(&sbi->s_es_lru_lock);
- ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
- trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
- return ret;
+ if (locked_ei && nr_shrunk == 0)
+ nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
+
+ return nr_shrunk;
}
-void ext4_es_register_shrinker(struct super_block *sb)
+static unsigned long ext4_es_count(struct shrinker *shrink,
+ struct shrink_control *sc)
{
+ unsigned long nr;
struct ext4_sb_info *sbi;
- sbi = EXT4_SB(sb);
+ sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
+ nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+ trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
+ return nr;
+}
+
+static unsigned long ext4_es_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct ext4_sb_info *sbi = container_of(shrink,
+ struct ext4_sb_info, s_es_shrinker);
+ int nr_to_scan = sc->nr_to_scan;
+ int ret, nr_shrunk;
+
+ ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+ trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
+
+ if (!nr_to_scan)
+ return ret;
+
+ nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
+
+ trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
+ return nr_shrunk;
+}
+
+void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
+{
INIT_LIST_HEAD(&sbi->s_es_lru);
spin_lock_init(&sbi->s_es_lru_lock);
- sbi->s_es_shrinker.shrink = ext4_es_shrink;
+ sbi->s_es_last_sorted = 0;
+ sbi->s_es_shrinker.scan_objects = ext4_es_scan;
+ sbi->s_es_shrinker.count_objects = ext4_es_count;
sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
register_shrinker(&sbi->s_es_shrinker);
}
-void ext4_es_unregister_shrinker(struct super_block *sb)
+void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
{
- unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker);
+ unregister_shrinker(&sbi->s_es_shrinker);
}
void ext4_es_lru_add(struct inode *inode)
@@ -947,11 +1066,14 @@ void ext4_es_lru_add(struct inode *inode)
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ ei->i_touch_when = jiffies;
+
+ if (!list_empty(&ei->i_es_lru))
+ return;
+
spin_lock(&sbi->s_es_lru_lock);
if (list_empty(&ei->i_es_lru))
list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
- else
- list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
spin_unlock(&sbi->s_es_lru_lock);
}
@@ -973,11 +1095,17 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
struct ext4_es_tree *tree = &ei->i_es_tree;
struct rb_node *node;
struct extent_status *es;
- int nr_shrunk = 0;
+ unsigned long nr_shrunk = 0;
+ static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
if (ei->i_es_lru_nr == 0)
return 0;
+ if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
+ __ratelimit(&_rs))
+ ext4_warning(inode->i_sb, "forced shrink of precached extents");
+
node = rb_first(&tree->root);
while (node != NULL) {
es = rb_entry(node, struct extent_status, rb_node);