aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs31
-rw-r--r--Documentation/filesystems/f2fs.txt24
-rw-r--r--MAINTAINERS1
-rw-r--r--fs/f2fs/Makefile2
-rw-r--r--fs/f2fs/checkpoint.c195
-rw-r--r--fs/f2fs/data.c621
-rw-r--r--fs/f2fs/debug.c53
-rw-r--r--fs/f2fs/dir.c47
-rw-r--r--fs/f2fs/f2fs.h195
-rw-r--r--fs/f2fs/file.c84
-rw-r--r--fs/f2fs/gc.c22
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/inline.c222
-rw-r--r--fs/f2fs/inode.c23
-rw-r--r--fs/f2fs/namei.c5
-rw-r--r--fs/f2fs/node.c272
-rw-r--r--fs/f2fs/node.h8
-rw-r--r--fs/f2fs/recovery.c49
-rw-r--r--fs/f2fs/segment.c584
-rw-r--r--fs/f2fs/segment.h81
-rw-r--r--fs/f2fs/super.c72
-rw-r--r--fs/f2fs/xattr.c2
-rw-r--r--include/linux/f2fs_fs.h7
-rw-r--r--include/trace/events/f2fs.h107
24 files changed, 1884 insertions, 825 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 31942efcaf0..32b0809203d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -24,3 +24,34 @@ Date: July 2013
Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
Description:
Controls the victim selection policy for garbage collection.
+
+What: /sys/fs/f2fs/<disk>/reclaim_segments
+Date: October 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the issue rate of segment discard commands.
+
+What: /sys/fs/f2fs/<disk>/ipu_policy
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the in-place-update policy.
+
+What: /sys/fs/f2fs/<disk>/min_ipu_util
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the FS utilization condition for the in-place-update
+ policies.
+
+What: /sys/fs/f2fs/<disk>/max_small_discards
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the issue rate of small discard commands.
+
+What: /sys/fs/f2fs/<disk>/max_victim_search
+Date: January 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the number of trials to find a victim segment.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index a3fe811bbdb..b8d284975f0 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -120,6 +120,8 @@ active_logs=%u Support configuring the number of active logs. In the
disable_ext_identify Disable the extension list configured by mkfs, so f2fs
does not aware of cold files such as media files.
inline_xattr Enable the inline xattrs feature.
+inline_data Enable the inline data feature: New created small(<~3.4k)
+ files can be written into inode block.
================================================================================
DEBUGFS ENTRIES
@@ -171,6 +173,28 @@ Files in /sys/fs/f2fs/<devname>
conduct checkpoint to reclaim the prefree segments
to free segments. By default, 100 segments, 200MB.
+ max_small_discards This parameter controls the number of discard
+ commands that consist small blocks less than 2MB.
+ The candidates to be discarded are cached until
+ checkpoint is triggered, and issued during the
+ checkpoint. By default, it is disabled with 0.
+
+ ipu_policy This parameter controls the policy of in-place
+ updates in f2fs. There are five policies:
+ 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR,
+ 2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL,
+ 4: F2FS_IPU_DISABLE.
+
+ min_ipu_util This parameter controls the threshold to trigger
+ in-place-updates. The number indicates percentage
+ of the filesystem utilization, and used by
+ F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
+
+ max_victim_search This parameter controls the number of trials to
+ find a victim segment when conducting SSR and
+ cleaning operations. The default value is 4096
+ which covers 8GB block address range.
+
================================================================================
USAGE
================================================================================
diff --git a/MAINTAINERS b/MAINTAINERS
index 0207c30906a..671047620db 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3634,6 +3634,7 @@ W: http://en.wikipedia.org/wiki/F2FS
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
S: Maintained
F: Documentation/filesystems/f2fs.txt
+F: Documentation/ABI/testing/sysfs-fs-f2fs
F: fs/f2fs/
F: include/linux/f2fs_fs.h
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 27a0820340b..2e35da12d29 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_F2FS_FS) += f2fs.o
-f2fs-y := dir.o file.o inode.o namei.o hash.o super.o
+f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 5716e5eb4e8..293d0486a40 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -30,7 +30,7 @@ static struct kmem_cache *inode_entry_slab;
*/
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
repeat:
page = grab_cache_page(mapping, index);
@@ -50,7 +50,7 @@ repeat:
*/
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
repeat:
page = grab_cache_page(mapping, index);
@@ -61,11 +61,12 @@ repeat:
if (PageUptodate(page))
goto out;
- if (f2fs_readpage(sbi, page, index, READ_SYNC))
+ if (f2fs_submit_page_bio(sbi, page, index,
+ READ_SYNC | REQ_META | REQ_PRIO))
goto repeat;
lock_page(page);
- if (page->mapping != mapping) {
+ if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
@@ -81,13 +82,12 @@ static int f2fs_write_meta_page(struct page *page,
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
/* Should not write any meta pages, if any IO error was occurred */
- if (wbc->for_reclaim || sbi->por_doing ||
- is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
- dec_page_count(sbi, F2FS_DIRTY_META);
- wbc->pages_skipped++;
- set_page_dirty(page);
- return AOP_WRITEPAGE_ACTIVATE;
- }
+ if (unlikely(sbi->por_doing ||
+ is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+ goto redirty_out;
+
+ if (wbc->for_reclaim)
+ goto redirty_out;
wait_on_page_writeback(page);
@@ -95,24 +95,31 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
return 0;
+
+redirty_out:
+ dec_page_count(sbi, F2FS_DIRTY_META);
+ wbc->pages_skipped++;
+ set_page_dirty(page);
+ return AOP_WRITEPAGE_ACTIVATE;
}
static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- struct block_device *bdev = sbi->sb->s_bdev;
+ int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
long written;
if (wbc->for_kupdate)
return 0;
- if (get_pages(sbi, F2FS_DIRTY_META) == 0)
+ /* collect a number of dirty meta pages and write together */
+ if (get_pages(sbi, F2FS_DIRTY_META) < nrpages)
return 0;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
- written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
+ written = sync_meta_pages(sbi, META, nrpages);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write -= written;
return 0;
@@ -121,7 +128,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = LONG_MAX;
struct pagevec pvec;
long nwritten = 0;
@@ -136,7 +143,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
+ if (unlikely(nr_pages == 0))
break;
for (i = 0; i < nr_pages; i++) {
@@ -149,7 +156,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
unlock_page(page);
break;
}
- if (nwritten++ >= nr_to_write)
+ nwritten++;
+ if (unlikely(nwritten >= nr_to_write))
break;
}
pagevec_release(&pvec);
@@ -157,7 +165,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
}
if (nwritten)
- f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
+ f2fs_submit_merged_bio(sbi, type, WRITE);
return nwritten;
}
@@ -186,31 +194,24 @@ const struct address_space_operations f2fs_meta_aops = {
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
- unsigned int max_orphans;
int err = 0;
- /*
- * considering 512 blocks in a segment 5 blocks are needed for cp
- * and log segment summaries. Remaining blocks are used to keep
- * orphan entries with the limitation one reserved segment
- * for cp pack we can have max 1020*507 orphan entries
- */
- max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
- mutex_lock(&sbi->orphan_inode_mutex);
- if (sbi->n_orphans >= max_orphans)
+ spin_lock(&sbi->orphan_inode_lock);
+ if (unlikely(sbi->n_orphans >= sbi->max_orphans))
err = -ENOSPC;
else
sbi->n_orphans++;
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
+
return err;
}
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
- mutex_lock(&sbi->orphan_inode_mutex);
+ spin_lock(&sbi->orphan_inode_lock);
f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -218,27 +219,30 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
struct list_head *head, *this;
struct orphan_inode_entry *new = NULL, *orphan = NULL;
- mutex_lock(&sbi->orphan_inode_mutex);
+ new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
+ new->ino = ino;
+
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
list_for_each(this, head) {
orphan = list_entry(this, struct orphan_inode_entry, list);
- if (orphan->ino == ino)
- goto out;
+ if (orphan->ino == ino) {
+ spin_unlock(&sbi->orphan_inode_lock);
+ kmem_cache_free(orphan_entry_slab, new);
+ return;
+ }
+
if (orphan->ino > ino)
break;
orphan = NULL;
}
- new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
- new->ino = ino;
-
/* add new_oentry into list which is sorted by inode number */
if (orphan)
list_add(&new->list, this->prev);
else
list_add_tail(&new->list, head);
-out:
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -246,7 +250,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
struct list_head *head;
struct orphan_inode_entry *orphan;
- mutex_lock(&sbi->orphan_inode_mutex);
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
@@ -257,7 +261,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
break;
}
}
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
}
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -270,12 +274,12 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
iput(inode);
}
-int recover_orphan_inodes(struct f2fs_sb_info *sbi)
+void recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blkaddr, i, j;
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
- return 0;
+ return;
sbi->por_doing = true;
start_blk = __start_cp_addr(sbi) + 1;
@@ -295,29 +299,39 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
/* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
sbi->por_doing = false;
- return 0;
+ return;
}
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
- struct list_head *head, *this, *next;
+ struct list_head *head;
struct f2fs_orphan_block *orphan_blk = NULL;
- struct page *page = NULL;
unsigned int nentries = 0;
- unsigned short index = 1;
- unsigned short orphan_blocks;
-
- orphan_blocks = (unsigned short)((sbi->n_orphans +
+ unsigned short index;
+ unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
+ struct page *page = NULL;
+ struct orphan_inode_entry *orphan = NULL;
+
+ for (index = 0; index < orphan_blocks; index++)
+ grab_meta_page(sbi, start_blk + index);
- mutex_lock(&sbi->orphan_inode_mutex);
+ index = 1;
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
/* loop for each orphan inode entry and write them in Jornal block */
- list_for_each_safe(this, next, head) {
- struct orphan_inode_entry *orphan;
+ list_for_each_entry(orphan, head, list) {
+ if (!page) {
+ page = find_get_page(META_MAPPING(sbi), start_blk++);
+ f2fs_bug_on(!page);
+ orphan_blk =
+ (struct f2fs_orphan_block *)page_address(page);
+ memset(orphan_blk, 0, sizeof(*orphan_blk));
+ f2fs_put_page(page, 0);
+ }
- orphan = list_entry(this, struct orphan_inode_entry, list);
+ orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
if (nentries == F2FS_ORPHANS_PER_BLOCK) {
/*
@@ -331,29 +345,20 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
set_page_dirty(page);
f2fs_put_page(page, 1);
index++;
- start_blk++;
nentries = 0;
page = NULL;
}
- if (page)
- goto page_exist;
+ }
- page = grab_meta_page(sbi, start_blk);
- orphan_blk = (struct f2fs_orphan_block *)page_address(page);
- memset(orphan_blk, 0, sizeof(*orphan_blk));
-page_exist:
- orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
+ if (page) {
+ orphan_blk->blk_addr = cpu_to_le16(index);
+ orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
+ orphan_blk->entry_count = cpu_to_le32(nentries);
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
}
- if (!page)
- goto end;
- orphan_blk->blk_addr = cpu_to_le16(index);
- orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
- orphan_blk->entry_count = cpu_to_le32(nentries);
- set_page_dirty(page);
- f2fs_put_page(page, 1);
-end:
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -428,7 +433,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
/* The second checkpoint pack should start at the next segment */
- cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+ cp_start_blk_no += ((unsigned long long)1) <<
+ le32_to_cpu(fsb->log_blocks_per_seg);
cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
if (cp1 && cp2) {
@@ -465,7 +471,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
- if (entry->inode == inode)
+ if (unlikely(entry->inode == inode))
return -EEXIST;
}
list_add_tail(&new->list, head);
@@ -513,8 +519,8 @@ void add_dirty_dir_inode(struct inode *inode)
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct list_head *head = &sbi->dir_inode_list;
- struct list_head *this;
+
+ struct list_head *this, *head;
if (!S_ISDIR(inode->i_mode))
return;
@@ -525,6 +531,7 @@ void remove_dirty_dir_inode(struct inode *inode)
return;
}
+ head = &sbi->dir_inode_list;
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
@@ -546,11 +553,13 @@ void remove_dirty_dir_inode(struct inode *inode)
struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head = &sbi->dir_inode_list;
- struct list_head *this;
+
+ struct list_head *this, *head;
struct inode *inode = NULL;
spin_lock(&sbi->dir_inode_lock);
+
+ head = &sbi->dir_inode_list;
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
@@ -565,11 +574,13 @@ struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
{
- struct list_head *head = &sbi->dir_inode_list;
+ struct list_head *head;
struct dir_inode_entry *entry;
struct inode *inode;
retry:
spin_lock(&sbi->dir_inode_lock);
+
+ head = &sbi->dir_inode_list;
if (list_empty(head)) {
spin_unlock(&sbi->dir_inode_lock);
return;
@@ -585,7 +596,7 @@ retry:
* We should submit bio, since it exists several
* wribacking dentry pages in the freeing inode.
*/
- f2fs_submit_bio(sbi, DATA, true);
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
}
goto retry;
}
@@ -760,8 +771,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* wait for previous submitted node/meta pages writeback */
wait_on_all_pages_writeback(sbi);
- filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
- filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
+ filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
+ filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
/* update user_block_counts */
sbi->last_valid_block_count = sbi->total_valid_block_count;
@@ -770,7 +781,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
- if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
+ if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
clear_prefree_segments(sbi);
F2FS_RESET_SB_DIRT(sbi);
}
@@ -791,9 +802,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
- f2fs_submit_bio(sbi, DATA, true);
- f2fs_submit_bio(sbi, NODE, true);
- f2fs_submit_bio(sbi, META, true);
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ f2fs_submit_merged_bio(sbi, META, WRITE);
/*
* update checkpoint pack index
@@ -818,20 +829,28 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
void init_orphan_info(struct f2fs_sb_info *sbi)
{
- mutex_init(&sbi->orphan_inode_mutex);
+ spin_lock_init(&sbi->orphan_inode_lock);
INIT_LIST_HEAD(&sbi->orphan_inode_list);
sbi->n_orphans = 0;
+ /*
+ * considering 512 blocks in a segment 8 blocks are needed for cp
+ * and log segment summaries. Remaining blocks are used to keep
+ * orphan entries with the limitation one reserved segment
+ * for cp pack we can have max 1020*504 orphan entries
+ */
+ sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
+ * F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
{
orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
sizeof(struct orphan_inode_entry), NULL);
- if (unlikely(!orphan_entry_slab))
+ if (!orphan_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
sizeof(struct dir_inode_entry), NULL);
- if (unlikely(!inode_entry_slab)) {
+ if (!inode_entry_slab) {
kmem_cache_destroy(orphan_entry_slab);
return -ENOMEM;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index aa3438c571f..0ae55872350 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -24,6 +24,195 @@
#include "segment.h"
#include <trace/events/f2fs.h>
+static void f2fs_read_end_io(struct bio *bio, int err)
+{
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+
+ do {
+ struct page *page = bvec->bv_page;
+
+ if (--bvec >= bio->bi_io_vec)
+ prefetchw(&bvec->bv_page->flags);
+
+ if (unlikely(!uptodate)) {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ } else {
+ SetPageUptodate(page);
+ }
+ unlock_page(page);
+ } while (bvec >= bio->bi_io_vec);
+
+ bio_put(bio);
+}
+
+static void f2fs_write_end_io(struct bio *bio, int err)
+{
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb);
+
+ do {
+ struct page *page = bvec->bv_page;
+
+ if (--bvec >= bio->bi_io_vec)
+ prefetchw(&bvec->bv_page->flags);
+
+ if (unlikely(!uptodate)) {
+ SetPageError(page);
+ set_bit(AS_EIO, &page->mapping->flags);
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ sbi->sb->s_flags |= MS_RDONLY;
+ }
+ end_page_writeback(page);
+ dec_page_count(sbi, F2FS_WRITEBACK);
+ } while (bvec >= bio->bi_io_vec);
+
+ if (bio->bi_private)
+ complete(bio->bi_private);
+
+ if (!get_pages(sbi, F2FS_WRITEBACK) &&
+ !list_empty(&sbi->cp_wait.task_list))
+ wake_up(&sbi->cp_wait);
+
+ bio_put(bio);
+}
+
+/*
+ * Low-level block read/write IO operations.
+ */
+static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
+ int npages, bool is_read)
+{
+ struct bio *bio;
+
+ /* No failure on bio allocation */
+ bio = bio_alloc(GFP_NOIO, npages);
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+ bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
+
+ return bio;
+}
+
+static void __submit_merged_bio(struct f2fs_bio_info *io)
+{
+ struct f2fs_io_info *fio = &io->fio;
+ int rw;
+
+ if (!io->bio)
+ return;
+
+ rw = fio->rw;
+
+ if (is_read_io(rw)) {
+ trace_f2fs_submit_read_bio(io->sbi->sb, rw,
+ fio->type, io->bio);
+ submit_bio(rw, io->bio);
+ } else {
+ trace_f2fs_submit_write_bio(io->sbi->sb, rw,
+ fio->type, io->bio);
+ /*
+ * META_FLUSH is only from the checkpoint procedure, and we
+ * should wait this metadata bio for FS consistency.
+ */
+ if (fio->type == META_FLUSH) {
+ DECLARE_COMPLETION_ONSTACK(wait);
+ io->bio->bi_private = &wait;
+ submit_bio(rw, io->bio);
+ wait_for_completion(&wait);
+ } else {
+ submit_bio(rw, io->bio);
+ }
+ }
+
+ io->bio = NULL;
+}
+
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
+ enum page_type type, int rw)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io;
+
+ io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
+
+ mutex_lock(&io->io_mutex);
+
+ /* change META to META_FLUSH in the checkpoint procedure */
+ if (type >= META_FLUSH) {
+ io->fio.type = META_FLUSH;
+ io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ }
+ __submit_merged_bio(io);
+ mutex_unlock(&io->io_mutex);
+}
+
+/*
+ * Fill the locked page with data located in the block address.
+ * Return unlocked page.
+ */
+int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
+ block_t blk_addr, int rw)
+{
+ struct bio *bio;
+
+ trace_f2fs_submit_page_bio(page, blk_addr, rw);
+
+ /* Allocate a new bio */
+ bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
+
+ if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ bio_put(bio);
+ f2fs_put_page(page, 1);
+ return -EFAULT;
+ }
+
+ submit_bio(rw, bio);
+ return 0;
+}
+
+void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
+ block_t blk_addr, struct f2fs_io_info *fio)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
+ struct f2fs_bio_info *io;
+ bool is_read = is_read_io(fio->rw);
+
+ io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+
+ verify_block_addr(sbi, blk_addr);
+
+ mutex_lock(&io->io_mutex);
+
+ if (!is_read)
+ inc_page_count(sbi, F2FS_WRITEBACK);
+
+ if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
+ io->fio.rw != fio->rw))
+ __submit_merged_bio(io);
+alloc_new:
+ if (io->bio == NULL) {
+ int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+
+ io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
+ io->fio = *fio;
+ }
+
+ if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
+ PAGE_CACHE_SIZE) {
+ __submit_merged_bio(io);
+ goto alloc_new;
+ }
+
+ io->last_block_in_bio = blk_addr;
+
+ mutex_unlock(&io->io_mutex);
+ trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
+}
+
/*
* Lock ordering for the change of data block address:
* ->data_page
@@ -37,7 +226,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
struct page *node_page = dn->node_page;
unsigned int ofs_in_node = dn->ofs_in_node;
- f2fs_wait_on_page_writeback(node_page, NODE, false);
+ f2fs_wait_on_page_writeback(node_page, NODE);
rn = F2FS_NODE(node_page);
@@ -51,19 +240,39 @@ int reserve_new_block(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
- if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM;
- if (!inc_valid_block_count(sbi, dn->inode, 1))
+ if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
return -ENOSPC;
trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
__set_data_blkaddr(dn, NEW_ADDR);
dn->data_blkaddr = NEW_ADDR;
+ mark_inode_dirty(dn->inode);
sync_inode_page(dn);
return 0;
}
+int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
+{
+ bool need_put = dn->inode_page ? false : true;
+ int err;
+
+ /* if inode_page exists, index should be zero */
+ f2fs_bug_on(!need_put && index);
+
+ err = get_dnode_of_data(dn, index, ALLOC_NODE);
+ if (err)
+ return err;
+
+ if (dn->data_blkaddr == NULL_ADDR)
+ err = reserve_new_block(dn);
+ if (err || need_put)
+ f2fs_put_dnode(dn);
+ return err;
+}
+
static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
struct buffer_head *bh_result)
{
@@ -71,6 +280,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr;
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return 0;
+
read_lock(&fi->ext.ext_lock);
if (fi->ext.len == 0) {
read_unlock(&fi->ext.ext_lock);
@@ -109,6 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs, start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
+ int need_update = true;
f2fs_bug_on(blk_addr == NEW_ADDR);
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -117,6 +330,9 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
/* Update the page address in the parent node */
__set_data_blkaddr(dn, blk_addr);
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return;
+
write_lock(&fi->ext.ext_lock);
start_fofs = fi->ext.fofs;
@@ -163,14 +379,21 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
fofs - start_fofs + 1;
fi->ext.len -= fofs - start_fofs + 1;
}
- goto end_update;
+ } else {
+ need_update = false;
}
- write_unlock(&fi->ext.ext_lock);
- return;
+ /* Finally, if the extent is very fragmented, let's drop the cache. */
+ if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
+ fi->ext.len = 0;
+ set_inode_flag(fi, FI_NO_EXTENT);
+ need_update = true;
+ }
end_update:
write_unlock(&fi->ext.ext_lock);
- sync_inode_page(dn);
+ if (need_update)
+ sync_inode_page(dn);
+ return;
}
struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
@@ -196,7 +419,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
return ERR_PTR(-ENOENT);
/* By fallocate(), there is no cached page, but with NEW_ADDR */
- if (dn.data_blkaddr == NEW_ADDR)
+ if (unlikely(dn.data_blkaddr == NEW_ADDR))
return ERR_PTR(-EINVAL);
page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
@@ -208,11 +431,14 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
return page;
}
- err = f2fs_readpage(sbi, page, dn.data_blkaddr,
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
sync ? READ_SYNC : READA);
+ if (err)
+ return ERR_PTR(err);
+
if (sync) {
wait_on_page_locked(page);
- if (!PageUptodate(page)) {
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 0);
return ERR_PTR(-EIO);
}
@@ -246,7 +472,7 @@ repeat:
}
f2fs_put_dnode(&dn);
- if (dn.data_blkaddr == NULL_ADDR) {
+ if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
f2fs_put_page(page, 1);
return ERR_PTR(-ENOENT);
}
@@ -266,16 +492,16 @@ repeat:
return page;
}
- err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
if (err)
return ERR_PTR(err);
lock_page(page);
- if (!PageUptodate(page)) {
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
- if (page->mapping != mapping) {
+ if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
@@ -286,12 +512,12 @@ repeat:
* Caller ensures that this data page is never allocated.
* A new zero-filled data page is allocated in the page cache.
*
- * Also, caller should grab and release a mutex by calling mutex_lock_op() and
- * mutex_unlock_op().
- * Note that, npage is set only by make_empty_dir.
+ * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f