aboutsummaryrefslogtreecommitdiff
path: root/fs/f2fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/Kconfig8
-rw-r--r--fs/f2fs/Makefile2
-rw-r--r--fs/f2fs/acl.c182
-rw-r--r--fs/f2fs/acl.h14
-rw-r--r--fs/f2fs/checkpoint.c519
-rw-r--r--fs/f2fs/data.c750
-rw-r--r--fs/f2fs/debug.c63
-rw-r--r--fs/f2fs/dir.c146
-rw-r--r--fs/f2fs/f2fs.h448
-rw-r--r--fs/f2fs/file.c301
-rw-r--r--fs/f2fs/gc.c69
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/inline.c250
-rw-r--r--fs/f2fs/inode.c121
-rw-r--r--fs/f2fs/namei.c80
-rw-r--r--fs/f2fs/node.c712
-rw-r--r--fs/f2fs/node.h44
-rw-r--r--fs/f2fs/recovery.c164
-rw-r--r--fs/f2fs/segment.c846
-rw-r--r--fs/f2fs/segment.h190
-rw-r--r--fs/f2fs/super.c326
-rw-r--r--fs/f2fs/xattr.c140
-rw-r--r--fs/f2fs/xattr.h10
23 files changed, 3551 insertions, 1836 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index e06e0995e00..214fe1054fc 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -63,3 +63,11 @@ config F2FS_FS_SECURITY
the extended attribute support in advance.
If you are not using a security module, say N.
+
+config F2FS_CHECK_FS
+ bool "F2FS consistency checking feature"
+ depends on F2FS_FS
+ help
+ Enables BUG_ONs which check the file system consistency in runtime.
+
+ If you want to improve the performance, say N.
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 27a0820340b..2e35da12d29 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_F2FS_FS) += f2fs.o
-f2fs-y := dir.o file.o inode.o namei.o hash.o super.o
+f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index b7826ec1b47..dbe2141d10a 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -17,9 +17,6 @@
#include "xattr.h"
#include "acl.h"
-#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
- (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
-
static inline size_t f2fs_acl_size(int count)
{
if (count <= 4) {
@@ -167,25 +164,17 @@ fail:
struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
void *value = NULL;
struct posix_acl *acl;
int retval;
- if (!test_opt(sbi, POSIX_ACL))
- return NULL;
-
- acl = get_cached_acl(inode, type);
- if (acl != ACL_NOT_CACHED)
- return acl;
-
if (type == ACL_TYPE_ACCESS)
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
if (retval > 0) {
- value = kmalloc(retval, GFP_KERNEL);
+ value = kmalloc(retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
retval = f2fs_getxattr(inode, name_index, "", value, retval);
@@ -205,19 +194,20 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
return acl;
}
-static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+static int __f2fs_set_acl(struct inode *inode, int type,
+ struct posix_acl *acl, struct page *ipage)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
int name_index;
void *value = NULL;
size_t size = 0;
int error;
- if (!test_opt(sbi, POSIX_ACL))
- return 0;
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
+ if (acl) {
+ error = posix_acl_valid(acl);
+ if (error < 0)
+ return error;
+ }
switch (type) {
case ACL_TYPE_ACCESS:
@@ -250,7 +240,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
}
}
- error = f2fs_setxattr(inode, name_index, "", value, size, NULL);
+ error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
kfree(value);
if (!error)
@@ -260,153 +250,31 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
}
-int f2fs_init_acl(struct inode *inode, struct inode *dir)
+int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
- struct posix_acl *acl = NULL;
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- int error = 0;
-
- if (!S_ISLNK(inode->i_mode)) {
- if (test_opt(sbi, POSIX_ACL)) {
- acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- }
- if (!acl)
- inode->i_mode &= ~current_umask();
- }
-
- if (test_opt(sbi, POSIX_ACL) && acl) {
-
- if (S_ISDIR(inode->i_mode)) {
- error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
- if (error)
- goto cleanup;
- }
- error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
- if (error < 0)
- return error;
- if (error > 0)
- error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
- }
-cleanup:
- posix_acl_release(acl);
- return error;
+ return __f2fs_set_acl(inode, type, acl, NULL);
}
-int f2fs_acl_chmod(struct inode *inode)
+int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct posix_acl *acl;
- int error;
- umode_t mode = get_inode_mode(inode);
-
- if (!test_opt(sbi, POSIX_ACL))
- return 0;
- if (S_ISLNK(mode))
- return -EOPNOTSUPP;
-
- acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl) || !acl)
- return PTR_ERR(acl);
+ struct posix_acl *default_acl, *acl;
+ int error = 0;
- error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
+ error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
if (error)
return error;
- error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
- posix_acl_release(acl);
- return error;
-}
-
-static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list,
- size_t list_size, const char *name, size_t name_len, int type)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- const char *xname = POSIX_ACL_XATTR_DEFAULT;
- size_t size;
- if (!test_opt(sbi, POSIX_ACL))
- return 0;
-
- if (type == ACL_TYPE_ACCESS)
- xname = POSIX_ACL_XATTR_ACCESS;
-
- size = strlen(xname) + 1;
- if (list && size <= list_size)
- memcpy(list, xname, size);
- return size;
-}
-
-static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name,
- void *buffer, size_t size, int type)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- struct posix_acl *acl;
- int error;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!test_opt(sbi, POSIX_ACL))
- return -EOPNOTSUPP;
-
- acl = f2fs_get_acl(dentry->d_inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (!acl)
- return -ENODATA;
- error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
-
- return error;
-}
-
-static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl = NULL;
- int error;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!test_opt(sbi, POSIX_ACL))
- return -EOPNOTSUPP;
- if (!inode_owner_or_capable(inode))
- return -EPERM;
-
- if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- error = posix_acl_valid(acl);
- if (error)
- goto release_and_out;
- }
- } else {
- acl = NULL;
+ if (default_acl) {
+ error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
+ ipage);
+ posix_acl_release(default_acl);
+ }
+ if (acl) {
+ if (error)
+ error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
+ ipage);
+ posix_acl_release(acl);
}
- error = f2fs_set_acl(inode, type, acl);
-
-release_and_out:
- posix_acl_release(acl);
return error;
}
-
-const struct xattr_handler f2fs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .list = f2fs_xattr_list_acl,
- .get = f2fs_xattr_get_acl,
- .set = f2fs_xattr_set_acl,
-};
-
-const struct xattr_handler f2fs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .list = f2fs_xattr_list_acl,
- .get = f2fs_xattr_get_acl,
- .set = f2fs_xattr_set_acl,
-};
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 80f43067441..e0864651cdc 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -36,20 +36,16 @@ struct f2fs_acl_header {
#ifdef CONFIG_F2FS_FS_POSIX_ACL
-extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type);
-extern int f2fs_acl_chmod(struct inode *inode);
-extern int f2fs_init_acl(struct inode *inode, struct inode *dir);
+extern struct posix_acl *f2fs_get_acl(struct inode *, int);
+extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);
#else
#define f2fs_check_acl NULL
#define f2fs_get_acl NULL
#define f2fs_set_acl NULL
-static inline int f2fs_acl_chmod(struct inode *inode)
-{
- return 0;
-}
-
-static inline int f2fs_init_acl(struct inode *inode, struct inode *dir)
+static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
+ struct page *page)
{
return 0;
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index bb312201ca9..0b4710c1d37 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -30,7 +30,7 @@ static struct kmem_cache *inode_entry_slab;
*/
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
repeat:
page = grab_cache_page(mapping, index);
@@ -38,9 +38,7 @@ repeat:
cond_resched();
goto repeat;
}
-
- /* We wait writeback only inside grab_meta_page() */
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, META);
SetPageUptodate(page);
return page;
}
@@ -50,7 +48,7 @@ repeat:
*/
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
repeat:
page = grab_cache_page(mapping, index);
@@ -61,67 +59,154 @@ repeat:
if (PageUptodate(page))
goto out;
- if (f2fs_readpage(sbi, page, index, READ_SYNC))
+ if (f2fs_submit_page_bio(sbi, page, index,
+ READ_SYNC | REQ_META | REQ_PRIO))
goto repeat;
lock_page(page);
- if (page->mapping != mapping) {
+ if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
out:
- mark_page_accessed(page);
return page;
}
+static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+{
+ switch (type) {
+ case META_NAT:
+ return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
+ case META_SIT:
+ return SIT_BLK_CNT(sbi);
+ case META_SSA:
+ case META_CP:
+ return 0;
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Readahead CP/NAT/SIT/SSA pages
+ */
+int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
+{
+ block_t prev_blk_addr = 0;
+ struct page *page;
+ int blkno = start;
+ int max_blks = get_max_meta_blks(sbi, type);
+
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = READ_SYNC | REQ_META | REQ_PRIO
+ };
+
+ for (; nrpages-- > 0; blkno++) {
+ block_t blk_addr;
+
+ switch (type) {
+ case META_NAT:
+ /* get nat block addr */
+ if (unlikely(blkno >= max_blks))
+ blkno = 0;
+ blk_addr = current_nat_addr(sbi,
+ blkno * NAT_ENTRY_PER_BLOCK);
+ break;
+ case META_SIT:
+ /* get sit block addr */
+ if (unlikely(blkno >= max_blks))
+ goto out;
+ blk_addr = current_sit_addr(sbi,
+ blkno * SIT_ENTRY_PER_BLOCK);
+ if (blkno != start && prev_blk_addr + 1 != blk_addr)
+ goto out;
+ prev_blk_addr = blk_addr;
+ break;
+ case META_SSA:
+ case META_CP:
+ /* get ssa/cp block addr */
+ blk_addr = blkno;
+ break;
+ default:
+ BUG();
+ }
+
+ page = grab_cache_page(META_MAPPING(sbi), blk_addr);
+ if (!page)
+ continue;
+ if (PageUptodate(page)) {
+ f2fs_put_page(page, 1);
+ continue;
+ }
+
+ f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
+ f2fs_put_page(page, 0);
+ }
+out:
+ f2fs_submit_merged_bio(sbi, META, READ);
+ return blkno - start;
+}
+
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- /* Should not write any meta pages, if any IO error was occurred */
- if (wbc->for_reclaim ||
- is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
- dec_page_count(sbi, F2FS_DIRTY_META);
- wbc->pages_skipped++;
- set_page_dirty(page);
- return AOP_WRITEPAGE_ACTIVATE;
- }
+ trace_f2fs_writepage(page, META);
- wait_on_page_writeback(page);
+ if (unlikely(sbi->por_doing))
+ goto redirty_out;
+ if (wbc->for_reclaim)
+ goto redirty_out;
+ /* Should not write any meta pages, if any IO error was occurred */
+ if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+ goto no_write;
+
+ f2fs_wait_on_page_writeback(page, META);
write_meta_page(sbi, page);
+no_write:
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
}
static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- struct block_device *bdev = sbi->sb->s_bdev;
- long written;
+ long diff, written;
- if (wbc->for_kupdate)
- return 0;
+ trace_f2fs_writepages(mapping->host, wbc, META);
- if (get_pages(sbi, F2FS_DIRTY_META) == 0)
- return 0;
+ /* collect a number of dirty meta pages and write together */
+ if (wbc->for_kupdate ||
+ get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+ goto skip_write;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
- written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
+ diff = nr_pages_to_write(sbi, META, wbc);
+ written = sync_meta_pages(sbi, META, wbc->nr_to_write);
mutex_unlock(&sbi->cp_mutex);
- wbc->nr_to_write -= written;
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
+ return 0;
+
+skip_write:
+ wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
return 0;
}
long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = LONG_MAX;
struct pagevec pvec;
long nwritten = 0;
@@ -136,20 +221,33 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
+ if (unlikely(nr_pages == 0))
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+
lock_page(page);
- BUG_ON(page->mapping != mapping);
- BUG_ON(!PageDirty(page));
- clear_page_dirty_for_io(page);
+
+ if (unlikely(page->mapping != mapping)) {
+continue_unlock:
+ unlock_page(page);
+ continue;
+ }
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
+
+ if (!clear_page_dirty_for_io(page))
+ goto continue_unlock;
+
if (f2fs_write_meta_page(page, &wbc)) {
unlock_page(page);
break;
}
- if (nwritten++ >= nr_to_write)
+ nwritten++;
+ if (unlikely(nwritten >= nr_to_write))
break;
}
pagevec_release(&pvec);
@@ -157,7 +255,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
}
if (nwritten)
- f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
+ f2fs_submit_merged_bio(sbi, type, WRITE);
return nwritten;
}
@@ -167,6 +265,8 @@ static int f2fs_set_meta_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+ trace_f2fs_set_page_dirty(page, META);
+
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
@@ -184,62 +284,50 @@ const struct address_space_operations f2fs_meta_aops = {
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
- unsigned int max_orphans;
int err = 0;
- /*
- * considering 512 blocks in a segment 5 blocks are needed for cp
- * and log segment summaries. Remaining blocks are used to keep
- * orphan entries with the limitation one reserved segment
- * for cp pack we can have max 1020*507 orphan entries
- */
- max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
- mutex_lock(&sbi->orphan_inode_mutex);
- if (sbi->n_orphans >= max_orphans)
+ spin_lock(&sbi->orphan_inode_lock);
+ if (unlikely(sbi->n_orphans >= sbi->max_orphans))
err = -ENOSPC;
else
sbi->n_orphans++;
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
+
return err;
}
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
- mutex_lock(&sbi->orphan_inode_mutex);
+ spin_lock(&sbi->orphan_inode_lock);
+ f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head, *this;
- struct orphan_inode_entry *new = NULL, *orphan = NULL;
+ struct list_head *head;
+ struct orphan_inode_entry *new, *orphan;
- mutex_lock(&sbi->orphan_inode_mutex);
+ new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
+ new->ino = ino;
+
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
- list_for_each(this, head) {
- orphan = list_entry(this, struct orphan_inode_entry, list);
- if (orphan->ino == ino)
- goto out;
+ list_for_each_entry(orphan, head, list) {
+ if (orphan->ino == ino) {
+ spin_unlock(&sbi->orphan_inode_lock);
+ kmem_cache_free(orphan_entry_slab, new);
+ return;
+ }
+
if (orphan->ino > ino)
break;
- orphan = NULL;
- }
-retry:
- new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
- if (!new) {
- cond_resched();
- goto retry;
}
- new->ino = ino;
- /* add new_oentry into list which is sorted by inode number */
- if (orphan)
- list_add(&new->list, this->prev);
- else
- list_add_tail(&new->list, head);
-out:
- mutex_unlock(&sbi->orphan_inode_mutex);
+ /* add new orphan entry into list which is sorted by inode number */
+ list_add_tail(&new->list, &orphan->list);
+ spin_unlock(&sbi->orphan_inode_lock);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -247,40 +335,46 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
struct list_head *head;
struct orphan_inode_entry *orphan;
- mutex_lock(&sbi->orphan_inode_mutex);
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
list_del(&orphan->list);
- kmem_cache_free(orphan_entry_slab, orphan);
+ f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- break;
+ spin_unlock(&sbi->orphan_inode_lock);
+ kmem_cache_free(orphan_entry_slab, orphan);
+ return;
}
}
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
}
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode = f2fs_iget(sbi->sb, ino);
- BUG_ON(IS_ERR(inode));
+ f2fs_bug_on(IS_ERR(inode));
clear_nlink(inode);
/* truncate all the data during iput */
iput(inode);
}
-int recover_orphan_inodes(struct f2fs_sb_info *sbi)
+void recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blkaddr, i, j;
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
- return 0;
+ return;
- sbi->por_doing = 1;
- start_blk = __start_cp_addr(sbi) + 1;
+ sbi->por_doing = true;
+
+ start_blk = __start_cp_addr(sbi) + 1 +
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
orphan_blkaddr = __start_sum_addr(sbi) - 1;
+ ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
+
for (i = 0; i < orphan_blkaddr; i++) {
struct page *page = get_meta_page(sbi, start_blk + i);
struct f2fs_orphan_block *orphan_blk;
@@ -294,30 +388,40 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
}
/* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
- sbi->por_doing = 0;
- return 0;
+ sbi->por_doing = false;
+ return;
}
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
- struct list_head *head, *this, *next;
+ struct list_head *head;
struct f2fs_orphan_block *orphan_blk = NULL;
- struct page *page = NULL;
unsigned int nentries = 0;
- unsigned short index = 1;
- unsigned short orphan_blocks;
-
- orphan_blocks = (unsigned short)((sbi->n_orphans +
+ unsigned short index;
+ unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
+ struct page *page = NULL;
+ struct orphan_inode_entry *orphan = NULL;
- mutex_lock(&sbi->orphan_inode_mutex);
+ for (index = 0; index < orphan_blocks; index++)
+ grab_meta_page(sbi, start_blk + index);
+
+ index = 1;
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
/* loop for each orphan inode entry and write them in Jornal block */
- list_for_each_safe(this, next, head) {
- struct orphan_inode_entry *orphan;
+ list_for_each_entry(orphan, head, list) {
+ if (!page) {
+ page = find_get_page(META_MAPPING(sbi), start_blk++);
+ f2fs_bug_on(!page);
+ orphan_blk =
+ (struct f2fs_orphan_block *)page_address(page);
+ memset(orphan_blk, 0, sizeof(*orphan_blk));
+ f2fs_put_page(page, 0);
+ }
- orphan = list_entry(this, struct orphan_inode_entry, list);
+ orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
if (nentries == F2FS_ORPHANS_PER_BLOCK) {
/*
@@ -331,29 +435,20 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
set_page_dirty(page);
f2fs_put_page(page, 1);
index++;
- start_blk++;
nentries = 0;
page = NULL;
}
- if (page)
- goto page_exist;
+ }
- page = grab_meta_page(sbi, start_blk);
- orphan_blk = (struct f2fs_orphan_block *)page_address(page);
- memset(orphan_blk, 0, sizeof(*orphan_blk));
-page_exist:
- orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
+ if (page) {
+ orphan_blk->blk_addr = cpu_to_le16(index);
+ orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
+ orphan_blk->entry_count = cpu_to_le32(nentries);
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
}
- if (!page)
- goto end;
-
- orphan_blk->blk_addr = cpu_to_le16(index);
- orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
- orphan_blk->entry_count = cpu_to_le32(nentries);
- set_page_dirty(page);
- f2fs_put_page(page, 1);
-end:
- mutex_unlock(&sbi->orphan_inode_mutex);
+
+ spin_unlock(&sbi->orphan_inode_lock);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -416,8 +511,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned long blk_size = sbi->blocksize;
unsigned long long cp1_version = 0, cp2_version = 0;
unsigned long long cp_start_blk_no;
+ unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+ block_t cp_blk_no;
+ int i;
- sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
+ sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
if (!sbi->ckpt)
return -ENOMEM;
/*
@@ -428,7 +526,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
/* The second checkpoint pack should start at the next segment */
- cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+ cp_start_blk_no += ((unsigned long long)1) <<
+ le32_to_cpu(fsb->log_blocks_per_seg);
cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
if (cp1 && cp2) {
@@ -447,6 +546,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
+ if (cp_blks <= 1)
+ goto done;
+
+ cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+ if (cur_page == cp2)
+ cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+
+ for (i = 1; i < cp_blks; i++) {
+ void *sit_bitmap_ptr;
+ unsigned char *ckpt = (unsigned char *)sbi->ckpt;
+
+ cur_page = get_meta_page(sbi, cp_blk_no + i);
+ sit_bitmap_ptr = page_address(cur_page);
+ memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
+ f2fs_put_page(cur_page, 1);
+ }
+done:
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
return 0;
@@ -459,19 +575,14 @@ fail_no_cp:
static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct list_head *head = &sbi->dir_inode_list;
- struct list_head *this;
-
- list_for_each(this, head) {
- struct dir_inode_entry *entry;
- entry = list_entry(this, struct dir_inode_entry, list);
- if (entry->inode == inode)
- return -EEXIST;
- }
- list_add_tail(&new->list, head);
-#ifdef CONFIG_F2FS_STAT_FS
- sbi->n_dirty_dirs++;
-#endif
+
+ if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
+ return -EEXIST;
+
+ set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+ F2FS_I(inode)->dirty_dir = new;
+ list_add_tail(&new->list, &sbi->dir_inode_list);
+ stat_inc_dirty_dir(sbi);
return 0;
}
@@ -479,75 +590,65 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct dir_inode_entry *new;
+ int ret = 0;
if (!S_ISDIR(inode->i_mode))
return;
-retry:
- new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
- if (!new) {
- cond_resched();
- goto retry;
- }
+
+ new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
- if (__add_dirty_inode(inode, new))
- kmem_cache_free(inode_entry_slab, new);
-
- inc_page_count(sbi, F2FS_DIRTY_DENTS);
+ ret = __add_dirty_inode(inode, new);
inode_inc_dirty_dents(inode);
SetPagePrivate(page);
spin_unlock(&sbi->dir_inode_lock);
+
+ if (ret)
+ kmem_cache_free(inode_entry_slab, new);
}
void add_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct dir_inode_entry *new;
-retry:
- new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
- if (!new) {
- cond_resched();
- goto retry;
- }
+ struct dir_inode_entry *new =
+ f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ int ret = 0;
+
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
- if (__add_dirty_inode(inode, new))
- kmem_cache_free(inode_entry_slab, new);
+ ret = __add_dirty_inode(inode, new);
spin_unlock(&sbi->dir_inode_lock);
+
+ if (ret)
+ kmem_cache_free(inode_entry_slab, new);
}
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct list_head *head = &sbi->dir_inode_list;
- struct list_head *this;
+ struct dir_inode_entry *entry;
if (!S_ISDIR(inode->i_mode))
return;
spin_lock(&sbi->dir_inode_lock);
- if (atomic_read(&F2FS_I(inode)->dirty_dents)) {
+ if (get_dirty_dents(inode) ||
+ !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
spin_unlock(&sbi->dir_inode_lock);
return;
}
- list_for_each(this, head) {
- struct dir_inode_entry *entry;
- entry = list_entry(this, struct dir_inode_entry, list);
- if (entry->inode == inode) {
- list_del(&entry->list);
- kmem_cache_free(inode_entry_slab, entry);
-#ifdef CONFIG_F2FS_STAT_FS
- sbi->n_dirty_dirs--;
-#endif
- break;
- }
- }
+ entry = F2FS_I(inode)->dirty_dir;
+ list_del(&entry->list);
+ F2FS_I(inode)->dirty_dir = NULL;
+ clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+ stat_dec_dirty_dir(sbi);
spin_unlock(&sbi->dir_inode_lock);
+ kmem_cache_free(inode_entry_slab, entry);
/* Only from the recovery routine */
if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
@@ -556,32 +657,15 @@ void remove_dirty_dir_inode(struct inode *inode)
}
}
-struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
-{
- struct list_head *head = &sbi->dir_inode_list;
- struct list_head *this;
- struct inode *inode = NULL;
-
- spin_lock(&sbi->dir_inode_lock);
- list_for_each(this, head) {
- struct dir_inode_entry *entry;
- entry = list_entry(this, struct dir_inode_entry, list);
- if (entry->inode->i_ino == ino) {
- inode = entry->inode;
- break;
- }
- }
- spin_unlock(&sbi->dir_inode_lock);
- return inode;
-}
-
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
{
- struct list_head *head = &sbi->dir_inode_list;
+ struct list_head *head;
struct dir_inode_entry *entry;
struct inode *inode;
retry:
spin_lock(&sbi->dir_inode_lock);
+
+ head = &sbi->dir_inode_list;
if (list_empty(head)) {
spin_unlock(&sbi->dir_inode_lock);
return;
@@ -590,14 +674,14 @@ retry:
inode = igrab(entry->inode);
spin_unlock(&sbi->dir_inode_lock);
if (inode) {
- filemap_flush(inode->i_mapping);
+ filemap_fdatawrite(inode->i_mapping);
iput(inode);
} else {
/*
* We should submit bio, since it exists several
* wribacking dentry pages in the freeing inode.
*/
- f2fs_submit_bio(sbi, DATA, true);
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
}
goto retry;
}
@@ -617,11 +701,10 @@ static void block_operations(struct f2fs_sb_info *sbi)
blk_start_plug(&plug);
retry_flush_dents:
- mutex_lock_all(sbi);
-
+ f2fs_lock_all(sbi);
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
- mutex_unlock_all(sbi);
+ f2fs_unlock_all(sbi);
sync_dirty_dir_inodes(sbi);
goto retry_flush_dents;
}
@@ -644,7 +727,22 @@ retry_flush_nodes:
static void unblock_operations(struct f2fs_sb_info *sbi)
{
mutex_unlock(&sbi->node_write);
- mutex_unlock_all(sbi);
+ f2fs_unlock_all(sbi);
+}
+
+static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+{
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ if (!get_pages(sbi, F2FS_WRITEBACK))
+ break;
+
+ io_schedule();
+ }
+ finish_wait(&sbi->cp_wait, &wait);
}
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
@@ -657,6 +755,13 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
__u32 crc32 = 0;
void *kaddr;
int i;
+ int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+
+ /*
+ * This avoids to conduct wrong roll-forward operations and uses
+ * metapages, so should be called prior to sync_meta_pages below.
+ */
+ discard_next_dnode(sbi);
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
@@ -701,16 +806,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
/ F2FS_ORPHANS_PER_BLOCK;
- ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
+ ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
+ orphan_blocks);
if (is_umount) {
set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
- data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
+ cp_payload_blks + data_sum_blocks +
+ orphan_blocks + NR_CURSEG_NODE_TYPE);
} else {
clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
- data_sum_blocks + orphan_blocks);
+ cp_payload_blks + data_sum_blocks +
+ orphan_blocks);
}
if (sbi->n_orphans)
@@ -736,6 +844,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
set_page_dirty(cp_page);
f2fs_put_page(cp_page, 1);
+ for (i = 1; i < 1 + cp_payload_blks; i++) {
+ cp_page = grab_meta_page(sbi, start_blk++);
+ kaddr = page_address(cp_page);
+ memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
+ (1 << sbi->log_blocksize));
+ set_page_dirty(cp_page);
+ f2fs_put_page(cp_page, 1);
+ }
+
if (sbi->n_orphans) {
write_orphan_inodes(sbi, start_blk);
start_blk += orphan_blocks;
@@ -756,11 +873,10 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
f2fs_put_page(cp_page, 1);
/* wait for previous submitted node/meta pages writeback */
- while (get_pages(sbi, F2FS_WRITEBACK))
- congestion_wait(BLK_RW_ASYNC, HZ / 50);
+ wait_on_all_pages_writeback(sbi);
- filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
- filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
+ filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
+ filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
/* update user_block_counts */
sbi->last_valid_block_count = sbi->total_valid_block_count;
@@ -769,7 +885,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
- if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
+ if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
clear_prefree_segments(sbi);
F2FS_RESET_SB_DIRT(sbi);
}
@@ -790,9 +906,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
- f2fs_submit_bio(sbi, DATA, true);
- f2fs_submit_bio(sbi, NODE, true);
- f2fs_submit_bio(sbi, META, true);
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ f2fs_submit_merged_bio(sbi, META, WRITE);
/*
* update checkpoint pack index
@@ -812,25 +928,34 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
unblock_operations(sbi);
mutex_unlock(&sbi->cp_mutex);
+ stat_inc_cp_count(sbi->stat_info);
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}
void init_orphan_info(struct f2fs_sb_info *sbi)
{
- mutex_init(&sbi->orphan_inode_mutex);
+ spin_lock_init(&sbi->orphan_inode_lock);
INIT_LIST_HEAD(&sbi->orphan_inode_list);
sbi->n_orphans = 0;
+ /*
+ * considering 512 blocks in a segment 8 blocks are needed for cp
+ * and log segment summaries. Remaining blocks are used to keep
+ * orphan entries with the limitation one reserved segment
+ * for cp pack we can have max 1020*504 orphan entries
+ */
+ sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
+ * F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
{
orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
- sizeof(struct orphan_inode_entry), NULL);
- if (unlikely(!orphan_entry_slab))
+ sizeof(struct orphan_inode_entry));
+ if (!orphan_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
- sizeof(struct dir_inode_entry), NULL);
- if (unlikely(!inode_entry_slab)) {
+ sizeof(struct dir_inode_entry));
+ if (!inode_entry_slab) {
kmem_cache_destroy(orphan_entry_slab);
return -ENOMEM;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 941f9b9ca3a..f8cf619edb5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -24,6 +24,190 @@
#include "segment.h"
#include <trace/events/f2fs.h>
+static void f2fs_read_end_io(struct bio *bio, int err)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+
+ if (!err) {
+ SetPageUptodate(page);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+ unlock_page(page);
+ }
+ bio_put(bio);
+}
+
+static void f2fs_write_end_io(struct bio *bio, int err)
+{
+ struct f2fs_sb_info *sbi = bio->bi_private;
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+
+ if (unlikely(err)) {
+ SetPageError(page);
+ set_bit(AS_EIO, &page->mapping->flags);
+ f2fs_stop_checkpoint(sbi);
+ }
+ end_page_writeback(page);
+ dec_page_count(sbi, F2FS_WRITEBACK);
+ }
+
+ if (sbi->wait_io) {
+ complete(sbi->wait_io);
+ sbi->wait_io = NULL;
+ }
+
+ if (!get_pages(sbi, F2FS_WRITEBACK) &&
+ !list_empty(&sbi->cp_wait.task_list))
+ wake_up(&sbi->cp_wait);
+
+ bio_put(bio);
+}
+
+/*
+ * Low-level block read/write IO operations.
+ */
+static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
+ int npages, bool is_read)
+{
+ struct bio *bio;
+
+ /* No failure on bio allocation */
+ bio = bio_alloc(GFP_NOIO, npages);
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+ bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
+ bio->bi_private = sbi;
+
+ return bio;
+}
+
+static void __submit_merged_bio(struct f2fs_bio_info *io)
+{
+ struct f2fs_io_info *fio = &io->fio;
+ int rw;
+
+ if (!io->bio)
+ return;
+
+ rw = fio->rw;
+
+ if (is_read_io(rw)) {
+ trace_f2fs_submit_read_bio(io->sbi->sb, rw,
+ fio->type, io->bio);
+ submit_bio(rw, io->bio);
+ } else {
+ trace_f2fs_submit_write_bio(io->sbi->sb, rw,
+ fio->type, io->bio);
+ /*
+ * META_FLUSH is only from the checkpoint procedure, and we
+ * should wait this metadata bio for FS consistency.
+ */
+ if (fio->type == META_FLUSH) {
+ DECLARE_COMPLETION_ONSTACK(wait);
+ io->sbi->wait_io = &wait;
+ submit_bio(rw, io->bio);
+ wait_for_completion(&wait);
+ } else {
+ submit_bio(rw, io->bio);
+ }
+ }
+
+ io->bio = NULL;
+}
+
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
+ enum page_type type, int rw)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io;
+
+ io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
+
+ down_write(&io->io_rwsem);
+
+ /* change META to META_FLUSH in the checkpoint procedure */
+ if (type >= META_FLUSH) {
+ io->fio.type = META_FLUSH;
+ io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ }
+ __submit_merged_bio(io);
+ up_write(&io->io_rwsem);
+}
+
+/*
+ * Fill the locked page with data located in the block address.
+ * Return unlocked page.
+ */
+int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
+ block_t blk_addr, int rw)
+{
+ struct bio *bio;
+
+ trace_f2fs_submit_page_bio(page, blk_addr, rw);
+
+ /* Allocate a new bio */
+ bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
+
+ if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ bio_put(bio);
+ f2fs_put_page(page, 1);
+ return -EFAULT;
+ }
+
+ submit_bio(rw, bio);
+ return 0;
+}
+
+void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
+ block_t blk_addr, struct f2fs_io_info *fio)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
+ struct f2fs_bio_info *io;
+ bool is_read = is_read_io(fio->rw);
+
+ io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+
+ verify_block_addr(sbi, blk_addr);
+
+ down_write(&io->io_rwsem);
+
+ if (!is_read)
+ inc_page_count(sbi, F2FS_WRITEBACK);
+
+ if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
+ io->fio.rw != fio->rw))
+ __submit_merged_bio(io);
+alloc_new:
+ if (io->bio == NULL) {
+ int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+
+ io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
+ io->fio = *fio;
+ }
+
+ if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
+ PAGE_CACHE_SIZE) {
+ __submit_merged_bio(io);
+ goto alloc_new;
+ }
+
+ io->last_block_in_bio = blk_addr;
+
+ up_write(&io->io_rwsem);
+ trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
+}
+
/*
* Lock ordering for the change of data block address:
* ->data_page
@@ -37,7 +221,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
struct page *node_page = dn->node_page;
unsigned int ofs_in_node = dn->ofs_in_node;
- f2fs_wait_on_page_writeback(node_page, NODE, false);
+ f2fs_wait_on_page_writeback(node_page, NODE);
rn = F2FS_NODE(node_page);
@@ -51,38 +235,57 @@ int reserve_new_block(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
- if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM;
- if (!inc_valid_block_count(sbi, dn->inode, 1))
+ if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
return -ENOSPC;
trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
__set_data_blkaddr(dn, NEW_ADDR);
dn->data_blkaddr = NEW_ADDR;
+ mark_inode_dirty(dn->inode);
sync_inode_page(dn);
return 0;
}
+int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
+{
+ bool need_put = dn->inode_page ? false : true;
+ int err;
+
+ /* if inode_page exists, index should be zero */
+ f2fs_bug_on(!need_put && index);
+
+ err = get_dnode_of_data(dn, index, ALLOC_NODE);
+ if (err)
+ return err;
+
+ if (dn->data_blkaddr == NULL_ADDR)
+ err = reserve_new_block(dn);
+ if (err || need_put)
+ f2fs_put_dnode(dn);
+ return err;
+}
+
static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
struct buffer_head *bh_result)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
-#ifdef CONFIG_F2FS_STAT_FS
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-#endif
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr;
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return 0;
+
read_lock(&fi->ext.ext_lock);
if (fi->ext.len == 0) {
read_unlock(&fi->ext.ext_lock);
return 0;
}
-#ifdef CONFIG_F2FS_STAT_FS
- sbi->total_hit_ext++;
-#endif
+ stat_inc_total_hit(inode->i_sb);
+
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
start_blkaddr = fi->ext.blk_addr;
@@ -100,9 +303,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
else
bh_result->b_size = UINT_MAX;
-#ifdef CONFIG_F2FS_STAT_FS
- sbi->read_hit_ext++;
-#endif
+ stat_inc_read_hit(inode->i_sb);
read_unlock(&fi->ext.ext_lock);
return 1;
}
@@ -115,14 +316,18 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs, start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
+ int need_update = true;
- BUG_ON(blk_addr == NEW_ADDR);
+ f2fs_bug_on(blk_addr == NEW_ADDR);
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
/* Update the page address in the parent node */
__set_data_blkaddr(dn, blk_addr);
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return;
+
write_lock(&fi->ext.ext_lock);
start_fofs = fi->ext.fofs;
@@ -169,14 +374,21 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
fofs - start_fofs + 1;
fi->ext.len -= fofs - start_fofs + 1;
}
- goto end_update;
+ } else {
+ need_update = false;
}
- write_unlock(&fi->ext.ext_lock);
- return;
+ /* Finally, if the extent is very fragmented, let's drop the cache. */
+ if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
+ fi->ext.len = 0;
+ set_inode_flag(fi, FI_NO_EXTENT);
+ need_update = true;
+ }
end_update:
write_unlock(&fi->ext.ext_lock);
- sync_inode_page(dn);
+ if (need_update)
+ sync_inode_page(dn);
+ return;
}
struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
@@ -202,10 +414,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
return ERR_PTR(-ENOENT);
/* By fallocate(), there is no cached page, but with NEW_ADDR */
- if (dn.data_blkaddr == NEW_ADDR)
+ if (unlikely(dn.data_blkaddr == NEW_ADDR))
return ERR_PTR(-EINVAL);
- page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+ page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -214,11 +426,14 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
return page;
}
- err = f2fs_readpage(sbi, page, dn.data_blkaddr,
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
sync ? READ_SYNC : READA);
+ if (err)
+ return ERR_PTR(err);
+
if (sync) {
wait_on_page_locked(page);
- if (!PageUptodate(page)) {
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 0);
return ERR_PTR(-EIO);
}
@@ -240,7 +455,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
int err;
repeat:
- page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+ page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -252,7 +467,7 @@ repeat:
}
f2fs_put_dnode(&dn);
- if (dn.data_blkaddr == NULL_ADDR) {
+ if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
f2fs_put_page(page, 1);
return ERR_PTR(-ENOENT);
}
@@ -272,16 +487,16 @@ repeat:
return page;
}
- err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
if (err)
return ERR_PTR(err);
lock_page(page);
- if (!PageUptodate(page)) {
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
- if (page->mapping != mapping) {
+ if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
@@ -292,12 +507,12 @@ repeat:
* Caller ensures that this data page is never allocated.
* A new zero-filled data page is allocated in the page cache.
*
- * Also, caller should grab and release a mutex by calling mutex_lock_op() and
- * mutex_unlock_op().
- * Note that, npage is set only by make_empty_dir.
+ * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
+ * Note that, ipage is set only by make_empty_dir.
*/
struct page *get_new_data_page(struct inode *inode,
- struct page *npage, pgoff_t index, bool new_i_size)
+ struct page *ipage, pgoff_t index, bool new_i_size)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct address_space *mapping = inode->i_mapping;
@@ -305,24 +520,16 @@ struct page *get_new_data_page(struct inode *inode,
struct dnode_of_data dn;
int err;
- set_new_dnode(&dn, inode, npage, npage, 0);
- err = get_dnode_of_data(&dn, index, ALLOC_NODE);
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, index);
if (err)
return ERR_PTR(err);
-
- if (dn.data_blkaddr == NULL_ADDR) {
- if (reserve_new_block(&dn)) {
- if (!npage)
- f2fs_put_dnode(&dn);
- return ERR_PTR(-ENOSPC);
- }
- }
- if (!npage)
- f2fs_put_dnode(&dn);
repeat:
page = grab_cache_page(mapping, index);
- if (!page)
- return ERR_PTR(-ENOMEM);
+ if (!page) {
+ err = -ENOMEM;
+ goto put_err;
+ }
if (PageUptodate(page))
return page;
@@ -331,15 +538,18 @@ repeat:
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
} else {
- err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
+ READ_SYNC);
if (err)
- return ERR_PTR(err);
+ goto put_err;
+
lock_page(page);
- if (!PageUptodate(page)) {
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
- return ERR_PTR(-EIO);
+ err = -EIO;
+ goto put_err;
}
- if (page->mapping != mapping) {
+ if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
@@ -350,140 +560,206 @@ repeat:
i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
/* Only the directory inode sets new_i_size */
set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
- mark_inode_dirty_sync(inode);
}
return page;
-}
-
-static void read_end_io(struct bio *bio, int err)
-{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- do {
- struct page *page = bvec->bv_page;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (uptodate) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
- } while (bvec >= bio->bi_io_vec);
- bio_put(bio);
+put_err:
+ f2fs_put_dnode(&dn);
+ return ERR_PTR(err);
}
-/*
- * Fill the locked page with data located in the block address.
- * Return unlocked page.
- */
-int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
- block_t blk_addr, int type)
+static int __allocate_data_block(struct dnode_of_data *dn)
{
- struct block_device *bdev = sbi->sb->s_bdev;
- struct bio *bio;
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+ struct f2fs_summary sum;
+ block_t new_blkaddr;
+ struct node_info ni;
+ int type;
- trace_f2fs_readpage(page, blk_addr, type);
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+ return -EPERM;
+ if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+ return -ENOSPC;
- down_read(&sbi->bio_sem);
+ __set_data_blkaddr(dn, NEW_ADDR);
+ dn->data_blkaddr = NEW_ADDR;
- /* Allocate a new bio */
- bio = f2fs_bio_alloc(bdev, 1);
+ get_node_info(sbi, dn->nid, &ni);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
- /* Initialize the bio */
- bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
- bio->bi_end_io = read_end_io;
+ type = CURSEG_WARM_DATA;
- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
- bio_put(bio);
- up_read(&sbi->bio_sem);
- f2fs_put_page(page, 1);
- return -EFAULT;
- }
+ allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
+
+ /* direct IO doesn't use extent cache to maximize the performance */
+ set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
+ update_extent_cache(new_blkaddr, dn);
+ clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
- submit_bio(type, bio);
- up_read(&sbi->bio_sem);
+ dn->data_blkaddr = new_blkaddr;
return 0;
}
/*
- * This function should be used by the data read flow only where it
- * does not check the "create" flag that indicates block allocation.
- * The reason for this special functionality is to exploit VFS readahead
- * mechanism.
+ * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
+ * If original data blocks are allocated, then give them to blockdev.
+ * Otherwise,
+ * a. preallocate requested block addresses
+ * b. do not use extent cache for better performance
+ * c. give the block addresses to blockdev
*/
-static int get_data_block_ro(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+static int __get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create, bool fiemap)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blkbits = inode->i_sb->s_blocksize_bits;
unsigned maxblocks = bh_result->b_size >> blkbits;
struct dnode_of_data dn;
- pgoff_t pgofs;
- int err;
+ int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
+ pgoff_t pgofs, end_offset;
+ int err = 0, ofs = 1;
+ bool allocated = false;
/* Get the page offset from the block offset(iblock) */
pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
- if (check_extent_cache(inode, pgofs, bh_result)) {
- trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
- return 0;
- }
+ if (check_extent_cache(inode, pgofs, bh_result))
+ goto out;
+
+ if (create)
+ f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+ err = get_dnode_of_data(&dn, pgofs, mode);
if (err) {
- trace_f2fs_get_data_block(inode, iblock, bh_result, err);
- return (err == -ENOENT) ? 0 : err;
+ if (err == -ENOENT)
+ err = 0;
+ goto unlock_out;
}
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
+ goto put_out;
- /* It does not support data allocation */
- BUG_ON(create);
+ if (dn.data_blkaddr != NULL_ADDR) {
+ map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ } else if (create) {
+ err = __allocate_data_block(&dn);
+ if (err)
+ goto put_out;
+ allocated = true;
+ map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ } else {
+ goto put_out;
+ }
- if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
- int i;
- unsigned int end_offset;
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ bh_result->b_size = (((size_t)1) << blkbits);
+ dn.ofs_in_node++;
+ pgofs++;
- end_offset = IS_INODE(dn.node_page) ?
- ADDRS_PER_INODE(F2FS_I(inode)) :
- ADDRS_PER_BLOCK;
+get_next:
+ if (dn.ofs_in_node >= end_offset) {
+ if (allocated)
+ sync_inode_page(&dn);
+ allocated = false;
+ f2fs_put_dnode(&dn);
- clear_buffer_new(bh_result);
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, mode);
+ if (err) {
+ if (err == -ENOENT)
+ err = 0;
+ goto unlock_out;
+ }
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
+ goto put_out;
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ }
+ if (maxblocks > (bh_result->b_size >> blkbits)) {
+ block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ if (blkaddr == NULL_ADDR && create) {
+ err = __allocate_data_block(&dn);
+ if (err)
+ goto sync_out;
+ allocated = true;
+ blkaddr = dn.data_blkaddr;
+ }
/* Give more consecutive addresses for the read ahead */
- for (i = 0; i < end_offset - dn.ofs_in_node; i++)
- if (((datablock_addr(dn.node_page,
- dn.ofs_in_node + i))
- != (dn.data_blkaddr + i)) || maxblocks == i)
- break;
- map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
- bh_result->b_size = (i << blkbits);
+ if (blkaddr == (bh_result->b_blocknr + ofs)) {
+ ofs++;
+ dn.ofs_in_node++;
+ pgofs++;
+ bh_result->b_size += (((size_t)1) << blkbits);
+ goto get_next;
+ }
}
+sync_out:
+ if (allocated)
+ sync_inode_page(&dn);
+put_out:
f2fs_put_dnode(&dn);
- trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
- return 0;
+unlock_out:
+ if (create)
+ f2fs_unlock_op(sbi);
+out:
+ trace_f2fs_get_data_block(inode, iblock, bh_result, err);
+ return err;
+}
+
+static int get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, false);
+}
+
+static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, true);
+}
+
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ return generic_block_fiemap(inode, fieinfo,
+ start, len, get_data_block_fiemap);
}
static int f2fs_read_data_page(struct file *file, struct page *page)
{
- return mpage_readpage(page, get_data_block_ro);
+ struct inode *inode = page->mapping->host;
+ int ret;
+
+ trace_f2fs_readpage(page, DATA);
+
+ /* If the file has inline data, try to read it directlly */
+ if (f2fs_has_inline_data(inode))
+ ret = f2fs_read_inline_data(inode, page);
+ else
+ ret = mpage_readpage(page, get_data_block);
+
+ return ret;
}
static int f2fs_read_data_pages(struct file *file,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
+ struct inode *inode = file->f_mapping->host;
+
+ /* If the file has inline data, skip readpages */
+ if (f2fs_has_inline_data(inode))
+ return 0;
+
+ return mpage_readpages(mapping, pages, nr_pages, get_data_block);
}
-int do_write_data_page(struct page *page)
+int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
{
struct inode *inode = page->mapping->host;
- block_t old_blk_addr, new_blk_addr;
+ block_t old_blkaddr, new_blkaddr;
struct dnode_of_data dn;
int err = 0;
@@ -492,10 +768,10 @@ int do_write_data_page(struct page *page)
if (err)
return err;
- old_blk_addr = dn.data_blkaddr;
+ old_blkaddr = dn.data_blkaddr;
/* This page is already truncated */
- if (old_blk_addr == NULL_ADDR)
+ if (old_blkaddr == NULL_ADDR)
goto out_writepage;
set_page_writeback(page);
@@ -504,15 +780,13 @@ int do_write_data_page(struct page *page)
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (unlikely(old_blk_addr != NEW_ADDR &&
+ if (unlikely(old_blkaddr != NEW_ADDR &&
!is_cold_data(page) &&
need_inplace_update(inode))) {
- rewrite_data_page(F2FS_SB(inode->i_sb), page,
- old_blk_addr);
+ rewrite_data_page(page, old_blkaddr, fio);
} else {
- write_data_page(inode, page, &dn,
- old_blk_addr, &new_blk_addr);
- update_extent_cache(new_blk_addr, &dn);
+ write_data_page(page, &dn, &new_blkaddr, fio);
+ update_extent_cache(new_blkaddr, &dn);
}
out_writepage:
f2fs_put_dnode(&dn);
@@ -527,9 +801,15 @@ static int f2fs_write_data_page(struct page *page,
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = ((unsigned long long) i_size)
>> PAGE_CACHE_SHIFT;
- unsigned offset;
+ unsigned offset = 0;
bool need_balance_fs = false;
int err = 0;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ };
+
+ trace_f2fs_writepage(page, DATA);
if (page->index < end_index)
goto write;
@@ -539,55 +819,50 @@ static int f2fs_write_data_page(struct page *page,
* this page does not have to be written to disk.
*/
offset = i_size & (PAGE_CACHE_SIZE - 1);
- if ((page->index >= end_index + 1) || !offset) {
- if (S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
- inode_dec_dirty_dents(inode);
- }
+ if ((page->index >= end_index + 1) || !offset)
goto out;
- }
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
write:
- if (sbi->por_doing) {
- err = AOP_WRITEPAGE_ACTIVATE;
+ if (unlikely(sbi->por_doing))
goto redirty_out;
- }
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
- inode_dec_dirty_dents(inode);
- err = do_write_data_page(page);
- } else {
- int ilock = mutex_lock_op(sbi);
- err = do_write_data_page(page);
- mutex_unlock_op(sbi, ilock);
- need_balance_fs = true;
+ err = do_write_data_page(page, &fio);
+ goto done;
}
- if (err == -ENOENT)
- goto out;
- else if (err)
+
+ if (!wbc->for_reclaim)
+ need_balance_fs = true;
+ else if (has_not_enough_free_secs(sbi, 0))
goto redirty_out;
- if (wbc->for_reclaim)
- f2fs_submit_bio(sbi, DATA, true);
+ f2fs_lock_op(sbi);
+ if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
+ err = f2fs_write_inline_data(inode, page, offset);
+ else
+ err = do_write_data_page(page, &fio);
+ f2fs_unlock_op(sbi);
+done:
+ if (err && err != -ENOENT)
+ goto redirty_out;
clear_cold_data(page);
out:
+ inode_dec_dirty_dents(inode);
unlock_page(page);
if (need_balance_fs)
f2fs_balance_fs(sbi);
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
return 0;
redirty_out:
- wbc->pages_skipped++;
- set_page_dirty(page);
- return err;
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
}
-#define MAX_DESIRED_PAGES_WP 4096
-
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
@@ -604,17 +879,20 @@ static int f2fs_write_data_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
bool locked = false;
int ret;
- long excess_nrtw = 0, desired_nrtw;
+ long diff;
+
+ trace_f2fs_writepages(mapping->host, wbc, DATA);
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
return 0;
- if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
- desired_nrtw = MAX_DESIRED_PAGES_WP;
- excess_nrtw = desired_nrtw - wbc->nr_to_write;
- wbc->nr_to_write = desired_nrtw;
- }
+ if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+ get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) &&
+ available_free_memory(sbi, DIRTY_DENTS))
+ goto skip_write;
+
+ diff = nr_pages_to_write(sbi, DATA, wbc);
if (!S_ISDIR(inode->i_mode)) {
mutex_lock(&sbi->writepages);
@@ -623,12 +901,17 @@ static int f2fs_write_data_pages(struct address_space *mapping,
ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
if (locked)
mutex_unlock(&sbi->writepages);
- f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
+
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
remove_dirty_dir_inode(inode);
- wbc->nr_to_write -= excess_nrtw;
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
+
+skip_write:
+ wbc->pages_skipped += get_dirty_dents(inode);
+ return 0;
}
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
@@ -641,30 +924,44 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
struct dnode_of_data dn;
int err = 0;
- int ilock;
+
+ trace_f2fs_write_begin(inode, pos, len, flags);
f2fs_balance_fs(sbi);
repeat:
+ err = f2fs_convert_inline_data(inode, pos + len);
+ if (err)
+ return err;
+
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
+
+ /* to avoid latency during memory pressure */
+ unlock_page(page);
+
*pagep = page;
- ilock = mutex_lock_op(sbi);
+ if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
+ goto inline_data;
+ f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, ALLOC_NODE);
- if (err)
- goto err;
+ err = f2fs_reserve_block(&dn, index);
+ f2fs_unlock_op(sbi);
- if (dn.data_blkaddr == NULL_ADDR)
- err = reserve_new_block(&dn);
-
- f2fs_put_dnode(&dn);
- if (err)
- goto err;
+ if (err) {
+ f2fs_put_page(page, 0);
+ return err;
+ }
+inline_data:
+ lock_page(page);
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
- mutex_unlock_op(sbi, ilock);
+ f2fs_wait_on_page_writeback(page, DATA);
if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
return 0;
@@ -681,15 +978,25 @@ repeat:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
- err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
- if (err)
- return err;
+ if (f2fs_has_inline_data(inode)) {
+ err = f2fs_read_inline_data(inode, page);
+ if (err) {
+ page_cache_release(page);
+ return err;
+ }
+ } else {
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
+ READ_SYNC);
+ if (err)
+ return err;
+ }
+
lock_page(page);
- if (!PageUptodate(page)) {
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return -EIO;
}
- if (page->mapping != mapping) {
+ if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
@@ -698,11 +1005,6 @@ out:
SetPageUptodate(page);
clear_cold_data(page);
return 0;
-
-err:
- mutex_unlock_op(sbi, ilock);
- f2fs_put_page(page, 1);
- return err;
}
static int f2fs_write_end(struct file *file,
@@ -712,6 +1014,8 @@ static int f2fs_write_end(struct file *file,
{
struct inode *inode = page->mapping->host;
+ trace_f2fs_write_end(inode, pos, len, copied);
+
SetPageUptodate(page);
set_page_dirty(page);
@@ -721,34 +1025,53 @@ static int f2fs_write_end(struct file *file,
update_inode_page(inode);
}
- unlock_page(page);
- page_cache_release(page);
+ f2fs_put_page(page, 1);
return copied;
}
+static int check_direct_IO(struct inode *inode, int rw,
+ struct iov_iter *iter, loff_t offset)
+{
+ unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
+
+ if (rw == READ)
+ return 0;
+
+ if (offset & blocksize_mask)
+ return -EINVAL;
+
+ if (iov_iter_alignment(iter) & blocksize_mask)
+ return -EINVAL;
+
+ return 0;
+}
+
static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+ struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- if (rw == WRITE)
+ /* Let buffer I/O handle the inline data case. */
+ if (f2fs_has_inline_data(inode))
+ return 0;
+
+ if (check_direct_IO(inode, rw, iter, offset))
return 0;
- /* Needs synchronization with the cleaner */
- return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
- get_data_block_ro);
+ /* clear fsync mark to recover these blocks */
+ fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
+
+ return blockdev_direct_IO(rw, iocb, inode, iter, offset,
+ get_data_block);
}
static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
unsigned int length)
{
struct inode *inode = page->mapping->host;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
+ if (PageDirty(page))
inode_dec_dirty_dents(inode);
- }
ClearPagePrivate(page);
}
@@ -763,7 +1086,11 @@ static int f2fs_set_data_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
+ trace_f2fs_set_page_dirty(page, DATA);
+
SetPageUptodate(page);
+ mark_inode_dirty(inode);
+
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
set_dirty_dir_page(inode, page);
@@ -774,7 +1101,12 @@ static int f2fs_set_data_page_dirty(struct page *page)
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
- return generic_block_bmap(mapping, block, get_data_block_ro);
+ struct inode *inode = mapping->host;
+
+ if (f2fs_has_inline_data(inode))
+ return 0;
+
+ return generic_block_bmap(mapping, block, get_data_block);
}
const struct address_space_operations f2fs_dblock_aops = {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a84b0a8e685..b52c12cf587 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -24,7 +24,7 @@
#include "gc.h"
static LIST_HEAD(f2fs_stat_list);
-static struct dentry *debugfs_root;
+static struct dentry *f2fs_debugfs_root;
static DEFINE_MUTEX(f2fs_stat_mutex);
static void update_general_status(struct f2fs_sb_info *sbi)
@@ -45,14 +45,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->valid_count = valid_user_blocks(sbi);
si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi);
+ si->inline_inode = sbi->inline_inode;
si->utilization = utilization(sbi);
si->free_segs = free_segments(sbi);
si->free_secs = free_sections(sbi);
si->prefree_count = prefree_segments(sbi);
si->dirty_count = dirty_segments(sbi);
- si->node_pages = sbi->node_inode->i_mapping->nrpages;
- si->meta_pages = sbi->meta_inode->i_mapping->nrpages;
+ si->node_pages = NODE_MAPPING(sbi)->nrpages;
+ si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->sits = SIT_I(sbi)->dirty_sentries;
si->fnids = NM_I(sbi)->fcnt;
@@ -85,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
- struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno, vblocks;
int ndirty = 0;
@@ -93,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
hblks_per_sec = blks_per_sec / 2;
- mutex_lock(&sit_i->sentry_lock);
for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
dist = abs(vblocks - hblks_per_sec);
@@ -104,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
ndirty++;
}
}
- mutex_unlock(&sit_i->sentry_lock);
dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
si->bimodal = bimodal / dist;
if (si->dirty_count)
@@ -165,9 +163,9 @@ get_cache:
/* free nids */
si->cache_mem = NM_I(sbi)->fcnt;
si->cache_mem += NM_I(sbi)->nat_cnt;
- npages = sbi->node_inode->i_mapping->nrpages;
+ npages = NODE_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- npages = sbi->meta_inode->i_mapping->nrpages;
+ npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
@@ -200,6 +198,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "Other: %u)\n - Data: %u\n",
si->valid_node_count - si->valid_inode_count,
si->valid_count - si->valid_node_count);
+ seq_printf(s, " - Inline_data Inode: %u\n",
+ si->inline_inode);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@@ -233,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_count);
seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
si->prefree_count, si->free_segs, si->free_secs);
+ seq_printf(s, "CP calls: %d\n", si->cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d\n", si->data_segs);
@@ -242,17 +243,17 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - node blocks : %d\n", si->node_blks);
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
si->hit_ext, si->total_ext);
- seq_printf(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - nodes %4d in %4d\n",
+ seq_puts(s, "\nBalancing F2FS Async:\n");
+ seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
- seq_printf(s, " - dents %4d in dirs:%4d\n",
+ seq_printf(s, " - dents: %4d in dirs:%4d\n",
si->ndirty_dent, si->ndirty_dirs);
- seq_printf(s, " - meta %4d in %4d\n",
+ seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
- seq_printf(s, " - NATs %5d > %lu\n",
- si->nats, NM_WOUT_THRESHOLD);
- seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
- si->sits, si->fnids);
+ seq_printf(s, " - NATs: %9d\n - SITs: %9d\n",
+ si->nats, si->sits);
+ seq_printf(s, " - free_nids: %9d\n",
+ si->fnids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
@@ -340,14 +341,32 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
void __init f2fs_create_root_stats(void)
{
- debugfs_root = debugfs_create_dir("f2fs", NULL);
- if (debugfs_root)
- debugfs_create_file("status", S_IRUGO, debugfs_root,
- NULL, &stat_fops);
+ struct dentry *file;
+
+ f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
+ if (!f2fs_debugfs_root)
+ goto bail;
+
+ file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
+ NULL, &stat_fops);
+ if (!file)
+ goto free_debugfs_dir;
+
+ return;
+
+free_debugfs_dir:
+ debugfs_remove(f2fs_debugfs_root);
+
+bail:
+ f2fs_debugfs_root = NULL;
+ return;
}
void f2fs_destroy_root_stats(void)
{
- debugfs_remove_recursive(debugfs_root);
- debugfs_root = NULL;
+ if (!f2fs_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(f2fs_debugfs_root);
+ f2fs_debugfs_root = NULL;
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 384c6daf9a8..a4addd72ebb 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
>> PAGE_CACHE_SHIFT;
}
-static unsigned int dir_buckets(unsigned int level)
+static unsigned int dir_buckets(unsigned int level, int dir_level)
{
- if (level < MAX_DIR_HASH_DEPTH / 2)
- return 1 << level;
+ if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
+ return 1 << (level + dir_level);
else
- return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
+ return MAX_DIR_BUCKETS;
}
static unsigned int bucket_blocks(unsigned int level)
@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
-static unsigned long dir_block_index(unsigned int level, unsigned int idx)
+static unsigned long dir_block_index(unsigned int level,
+ int dir_level, unsigned int idx)
{
unsigned long i;
unsigned long bidx = 0;
for (i = 0; i < level; i++)
- bidx += dir_buckets(i) * bucket_blocks(i);
+ bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
bidx += idx * bucket_blocks(level);
return bidx;
}
@@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
- unsigned long bit_pos, end_pos, next_pos;
+ unsigned long bit_pos = 0;
struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
- int slots;
+ const void *dentry_bits = &dentry_blk->dentry_bitmap;
+ int max_len = 0;
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK, 0);
while (bit_pos < NR_DENTRY_IN_BLOCK) {
+ if (!test_bit_le(bit_pos, dentry_bits)) {
+ if (bit_pos == 0)
+ max_len = 1;
+ else if (!test_bit_le(bit_pos - 1, dentry_bits))
+ max_len++;
+ bit_pos++;
+ continue;
+ }
de = &dentry_blk->dentry[bit_pos];
- slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
-
if (early_match_name(name, namelen, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
name, namelen)) {
@@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
goto found;
}
}
- next_pos = bit_pos + slots;
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK, next_pos);
- if (bit_pos >= NR_DENTRY_IN_BLOCK)
- end_pos = NR_DENTRY_IN_BLOCK;
- else
- end_pos = bit_pos;
- if (*max_slots < end_pos - next_pos)
- *max_slots = end_pos - next_pos;
+ if (max_len > *max_slots) {
+ *max_slots = max_len;
+ max_len = 0;
+ }
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
de = NULL;
kunmap(dentry_page);
found:
+ if (max_len > *max_slots)
+ *max_slots = max_len;
return de;
}
@@ -139,12 +143,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
bool room = false;
int max_slots = 0;
- BUG_ON(level > MAX_DIR_HASH_DEPTH);
+ f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
- nbucket = dir_buckets(level);
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
- bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ le32_to_cpu(namehash) % nbucket);
end_block = bidx + nblock;
for (; bidx < end_block; bidx++) {
@@ -190,9 +195,6 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
unsigned int max_depth;
unsigned int level;
- if (namelen > F2FS_NAME_LEN)
- return NULL;
-
if (npages == 0)
return NULL;
@@ -251,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode)
{
lock_page(page);
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
kunmap(page);
@@ -259,20 +261,19 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
- /* update parent inode number before releasing dentry page */
- F2FS_I(inode)->i_pino = dir->i_ino;
-
f2fs_put_page(page, 1);
}
static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
- struct f2fs_node *rn;
+ struct f2fs_inode *ri;
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
/* copy name info. to this inode page */
- rn = F2FS_NODE(ipage);
- rn->i.i_namelen = cpu_to_le32(name->len);
- memcpy(rn->i.i_name, name->name, name->len);
+ ri = F2FS_INODE(ipage);
+ ri->i_namelen = cpu_to_le32(name->len);
+ memcpy(ri->i_name, name->name, name->len);
set_page_dirty(ipage);
}
@@ -346,21 +347,18 @@ static struct page *init_inode_metadata(struct inode *inode,
goto error;
}
- err = f2fs_init_acl(inode, dir);
+ err = f2fs_init_acl(inode, dir, page);
if (err)
- goto error;
+ goto put_error;
err = f2fs_init_security(inode, dir, name, page);
if (err)
- goto error;
-
- wait_on_page_writeback(page);
+ goto put_error;
} else {
page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
if (IS_ERR(page))
return page;
- wait_on_page_writeback(page);
set_cold_node(inode, page);
}
@@ -376,8 +374,13 @@ static struct page *init_inode_metadata(struct inode *inode,
}
return page;
-error:
+put_error:
f2fs_put_page(page, 1);
+error:
+ /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
+ truncate_inode_pages(&inode->i_data, 0);
+ truncate_blocks(inode, 0);
+ remove_dirty_dir_inode(inode);
remove_inode_page(inode);
return ERR_PTR(err);
}
@@ -393,16 +396,13 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
}
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(dir);
+
if (F2FS_I(dir)->i_current_depth != current_depth) {
F2FS_I(dir)->i_current_depth = current_depth;
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
- if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
- update_inode_page(dir);
- else
- mark_inode_dirty(dir);
-
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
}
@@ -432,10 +432,11 @@ next:
}
/*
- * Caller should grab and release a mutex by calling mutex_lock_op() and
- * mutex_unlock_op().
+ * Caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
*/
-int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode)
+int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+ struct inode *inode)
{
unsigned int bit_pos;
unsigned int level;
@@ -461,17 +462,18 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in
}
start:
- if (current_depth == MAX_DIR_HASH_DEPTH)
+ if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
/* Increase the depth, if required */
if (level == current_depth)
++current_depth;
- nbucket = dir_buckets(level);
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
- bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ (le32_to_cpu(dentry_hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = get_new_data_page(dir, NULL, block, true);
@@ -491,8 +493,9 @@ start:
++level;
goto start;
add_dentry:
- wait_on_page_writeback(dentry_page);
+ f2fs_wait_on_page_writeback(dentry_page, DATA);
+ down_write(&F2FS_I(inode)->i_sem);
page = init_inode_metadata(inode, dir, name);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -515,7 +518,12 @@ add_dentry:
update_parent_metadata(dir, inode, current_depth);
fail:
- clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ up_write(&F2FS_I(inode)->i_sem);
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+ update_inode_page(dir);
+ clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
return err;
@@ -532,13 +540,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
unsigned int bit_pos;
struct address_space *mapping = page->mapping;
struct inode *dir = mapping->host;
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
void *kaddr = page_address(page);
int i;
lock_page(page);
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
dentry_blk = (struct f2fs_dentry_block *)kaddr;
bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
@@ -554,20 +561,22 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- if (inode && S_ISDIR(inode->i_mode)) {
- drop_nlink(dir);
- update_inode_page(dir);
- } else {
- mark_inode_dirty(dir);
- }
-
if (inode) {
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+
+ down_write(&F2FS_I(inode)->i_sem);
+
+ if (S_ISDIR(inode->i_mode)) {
+ drop_nlink(dir);
+ update_inode_page(dir);
+ }
inode->i_ctime = CURRENT_TIME;
drop_nlink(inode);
if (S_ISDIR(inode->i_mode)) {
drop_nlink(inode);
i_size_write(inode, 0);
}
+ up_write(&F2FS_I(inode)->i_sem);
update_inode_page(inode);
if (inode->i_nlink == 0)
@@ -580,7 +589,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
truncate_hole(dir, page->index, page->index + 1);
clear_page_dirty_for_io(page);
ClearPageUptodate(page);
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(dir);
}
f2fs_put_page(page, 1);
@@ -631,12 +639,18 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dir_entry *de = NULL;
struct page *dentry_page = NULL;
+ struct file_ra_state *ra = &file->f_ra;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
unsigned char d_type = DT_UNKNOWN;
bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
- for ( ; n < npages; n++) {
+ /* readahead for multi pages of dir */
+ if (npages - n > 1 && !ra_has_index(ra, n))
+ page_cache_sync_readahead(inode->i_mapping, ra, file, n,
+ min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
+
+ for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n);
if (IS_ERR(dentry_page))
continue;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 608f0df5b91..58df97e174d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -18,6 +18,15 @@
#include <linux/crc32.h>
#include <linux/magic.h>
#include <linux/kobject.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_F2FS_CHECK_FS
+#define f2fs_bug_on(condition) BUG_ON(condition)
+#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
+#else
+#define f2fs_bug_on(condition)
+#define f2fs_down_write(x, y) down_write(x)
+#endif
/*
* For mount options
@@ -30,6 +39,8 @@
#define F2FS_MOUNT_POSIX_ACL 0x00000020
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
+#define F2FS_MOUNT_INLINE_DATA 0x00000100
+#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -78,6 +89,16 @@ enum {
SIT_BITMAP
};
+/*
+ * For CP/NAT/SIT/SSA readahead
+ */
+enum {
+ META_CP,
+ META_NAT,
+ META_SIT,
+ META_SSA
+};
+
/* for the list of orphan inodes */
struct orphan_inode_entry {
struct list_head list; /* list head */
@@ -90,6 +111,13 @@ struct dir_inode_entry {
struct inode *inode; /* vfs inode pointer */
};
+/* for the list of blockaddresses to be discarded */
+struct discard_entry {
+ struct list_head list; /* list head */
+ block_t blkaddr; /* block address to be discarded */
+ int len; /* # of consecutive blocks of the discard */
+};
+
/* for the list of fsync inodes, used only during recovery */
struct fsync_inode_entry {
struct list_head list; /* list head */
@@ -148,13 +176,17 @@ enum {
LOOKUP_NODE, /* look up a node without readahead */
LOOKUP_NODE_RA, /*
* look up a node with readahead called
- * by get_datablock_ro.
+ * by get_data_block.
*/
};
#define F2FS_LINK_MAX 32000 /* maximum link count per file */
+#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
+
/* for in-memory extent cache entry */
+#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */
+
struct extent_info {
rwlock_t ext_lock; /* rwlock for consistency */
unsigned int fofs; /* start offset in a file */
@@ -168,22 +200,27 @@ struct extent_info {
#define FADVISE_COLD_BIT 0x01
#define FADVISE_LOST_PINO_BIT 0x02
+#define DEF_DIR_LEVEL 0
+
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
+ unsigned char i_dir_level; /* use for dentry level for large dir */
unsigned int i_current_depth; /* use only in directory structure */
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
/* Use below internally in f2fs*/
unsigned long flags; /* use to pass per-file flags */
+ struct rw_semaphore i_sem; /* protect fi info */
atomic_t dirty_dents; /* # of dirty dentry pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
struct extent_info ext; /* in-memory extent cache entry */
+ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -209,7 +246,9 @@ static inline void set_raw_extent(struct extent_info *ext,
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
+ nid_t available_nids; /* maximum available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
+ unsigned int ram_thresh; /* control the memory footprint */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -219,6 +258,7 @@ struct f2fs_nm_info {
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
/* free node ids management */
+ struct radix_tree_root free_nid_root;/* root of the free_nid cache */
struct list_head free_nid_list; /* a list for free nids */
spinlock_t free_nid_list_lock; /* protect free nid list */
unsigned int fcnt; /* the number of free node id */
@@ -281,15 +321,27 @@ enum {
NO_CHECK_TYPE
};
+struct flush_cmd {
+ struct flush_cmd *next;
+ struct completion wait;
+ int ret;
+};
+
+struct flush_cmd_control {
+ struct task_struct *f2fs_issue_flush; /* flush thread */
+ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
+ struct flush_cmd *issue_list; /* list for command issue */
+ struct flush_cmd *dispatch_list; /* list for command dispatch */
+ spinlock_t issue_lock; /* for issue list lock */
+ struct flush_cmd *issue_tail; /* list tail of issue list */
+};
+
struct f2fs_sm_info {
struct sit_info *sit_info; /* whole segment information */
struct free_segmap_info *free_info; /* free segment information */
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
- struct list_head wblist_head; /* list of under-writeback pages */
- spinlock_t wblist_lock; /* lock for checkpoint */
-
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
block_t ssa_blkaddr; /* start block address of SSA area */
@@ -298,6 +350,21 @@ struct f2fs_sm_info {
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
unsigned int ovp_segments; /* # of overprovision segments */
+
+ /* a threshold to reclaim prefree segments */
+ unsigned int rec_prefree_segments;
+
+ /* for small discard management */
+ struct list_head discard_list; /* 4KB discard list */
+ int nr_discards; /* # of discards in the list */
+ int max_discards; /* max. discards to be issued */
+
+ unsigned int ipu_policy; /* in-place-update policy */
+ unsigned int min_ipu_util; /* in-place-update threshold */
+
+ /* for flush command control */
+ struct flush_cmd_control *cmd_control_info;
+
};
/*
@@ -318,14 +385,6 @@ enum count_type {
};
/*
- * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS].
- * The checkpoint procedure blocks all the locks in this fs_lock array.
- * Some FS operations grab free locks, and if there is no free lock,
- * then wait to grab a lock in a round-robin manner.
- */
-#define NR_GLOBAL_LOCKS 8
-
-/*
* The below are the page types of bios used in submti_bio().
* The available types are:
* DATA User data pages. It operates as async mode.
@@ -336,6 +395,7 @@ enum count_type {
* with waiting the bio's completion
* ... Only can be used with META.
*/
+#define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
enum page_type {
DATA,
NODE,
@@ -344,6 +404,20 @@ enum page_type {
META_FLUSH,
};
+struct f2fs_io_info {
+ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
+ int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
+};
+
+#define is_read_io(rw) (((rw) & 1) == READ)
+struct f2fs_bio_info {
+ struct f2fs_sb_info *sbi; /* f2fs superblock */
+ struct bio *bio; /* bios to merge */
+ sector_t last_block_in_bio; /* last block number */
+ struct f2fs_io_info fio; /* store buffered io info. */
+ struct rw_semaphore io_rwsem; /* blocking op for bio */
+};
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
@@ -357,25 +431,27 @@ struct f2fs_sb_info {
/* for segment-related operations */
struct f2fs_sm_info *sm_info; /* segment manager */
- struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */
- sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */
- struct rw_semaphore bio_sem; /* IO semaphore */
+
+ /* for bio operations */
+ struct f2fs_bio_info read_io; /* for read bios */
+ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
+ struct completion *wait_io; /* for completion bios */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
- struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */
+ struct rw_semaphore cp_rwsem; /* blocking FS operations */
struct mutex node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
- unsigned char next_lock_num; /* round-robin global locks */
- int por_doing; /* recovery is doing or not */
- int on_build_free_nids; /* build_free_nids is doing */
+ bool por_doing; /* recovery is doing or not */
+ wait_queue_head_t cp_wait;
/* for orphan inode management */
struct list_head orphan_inode_list; /* orphan inode list */
- struct mutex orphan_inode_mutex; /* for orphan inode list */
+ spinlock_t orphan_inode_lock; /* for orphan inode list */
unsigned int n_orphans; /* # of orphan inodes */
+ unsigned int max_orphans; /* max orphan inodes */
/* for directory inode management */
struct list_head dir_inode_list; /* dir inode list */
@@ -397,6 +473,7 @@ struct f2fs_sb_info {
unsigned int total_valid_node_count; /* valid node block count */
unsigned int total_valid_inode_count; /* valid inode count */
int active_logs; /* # of active logs */
+ int dir_level; /* directory level */
block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
@@ -412,6 +489,9 @@ struct f2fs_sb_info {
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
+ /* maximum # of trials to find a victim segment for SSR and GC */
+ unsigned int max_victim_search;
+
/*
* for stat information.
* one is for the LFS mode, and the other is for the SSR mode.
@@ -421,6 +501,7 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
+ int inline_inode; /* # of inline_data inodes */
int bg_gc; /* background gc calls */
unsigned int n_dirty_dirs; /* # of dir inodes */
#endif
@@ -460,6 +541,11 @@ static inline struct f2fs_node *F2FS_NODE(struct page *page)
return (struct f2fs_node *)page_address(page);
}
+static inline struct f2fs_inode *F2FS_INODE(struct page *page)
+{
+ return &((struct f2fs_node *)page_address(page))->i;
+}
+
static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
{
return (struct f2fs_nm_info *)(sbi->nm_info);
@@ -485,6 +571,16 @@ static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)
return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);
}
+static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi)
+{
+ return sbi->meta_inode->i_mapping;
+}
+
+static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi)
+{
+ return sbi->node_inode->i_mapping;
+}
+
static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)
{
sbi->s_dirty = 1;
@@ -520,48 +616,24 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
cp->ckpt_flags = cpu_to_le32(ckpt_flags);
}
-static inline void mutex_lock_all(struct f2fs_sb_info *sbi)
+static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
- int i;
-
- for (i = 0; i < NR_GLOBAL_LOCKS; i++) {
- /*
- * This is the only time we take multiple fs_lock[]
- * instances; the order is immaterial since we
- * always hold cp_mutex, which serializes multiple
- * such operations.
- */
- mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex);
- }
+ down_read(&sbi->cp_rwsem);
}
-static inline void mutex_unlock_all(struct f2fs_sb_info *sbi)
+static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
{
- int i = 0;
- for (; i < NR_GLOBAL_LOCKS; i++)
- mutex_unlock(&sbi->fs_lock[i]);
+ up_read(&sbi->cp_rwsem);
}
-static inline int mutex_lock_op(struct f2fs_sb_info *sbi)
+static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
{
- unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS;
- int i = 0;
-
- for (; i < NR_GLOBAL_LOCKS; i++)
- if (mutex_trylock(&sbi->fs_lock[i]))
- return i;
-
- mutex_lock(&sbi->fs_lock[next_lock]);
- sbi->next_lock_num++;
- return next_lock;
+ f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
}
-static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock)
+static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
{
- if (ilock < 0)
- return;
- BUG_ON(ilock >= NR_GLOBAL_LOCKS);
- mutex_unlock(&sbi->fs_lock[ilock]);
+ up_write(&sbi->cp_rwsem);
}
/*
@@ -569,8 +641,9 @@ static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock)
*/
static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
- WARN_ON((nid >= NM_I(sbi)->max_nid));
- if (nid >= NM_I(sbi)->max_nid)
+ if (unlikely(nid < F2FS_ROOT_INO(sbi)))
+ return -EINVAL;
+ if (unlikely(nid >= NM_I(sbi)->max_nid))
return -EINVAL;
return 0;
}
@@ -583,9 +656,14 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
static inline int F2FS_HAS_BLOCKS(struct inode *inode)
{
if (F2FS_I(inode)->i_xattr_nid)
- return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1);
+ return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;
else
- return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS);
+ return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
+}
+
+static inline bool f2fs_has_xattr_block(unsigned int ofs)
+{
+ return ofs == XATTR_NODE_OFFSET;
}
static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
@@ -596,7 +674,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
valid_block_count =
sbi->total_valid_block_count + (block_t)count;
- if (valid_block_count > sbi->user_block_count) {
+ if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
@@ -607,17 +685,16 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
return true;
}
-static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
+static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
blkcnt_t count)
{
spin_lock(&sbi->stat_lock);
- BUG_ON(sbi->total_valid_block_count < (block_t) count);
- BUG_ON(inode->i_blocks < count);
+ f2fs_bug_on(sbi->total_valid_block_count < (block_t) count);
+ f2fs_bug_on(inode->i_blocks < count);
inode->i_blocks -= count;
sbi->total_valid_block_count -= (block_t)count;
spin_unlock(&sbi->stat_lock);
- return 0;
}
static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -628,6 +705,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_inc_dirty_dents(struct inode *inode)
{
+ inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
atomic_inc(&F2FS_I(inode)->dirty_dents);
}
@@ -638,6 +716,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_dec_dirty_dents(struct inode *inode)
{
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
atomic_dec(&F2FS_I(inode)->dirty_dents);
}
@@ -646,6 +728,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
return atomic_read(&sbi->nr_pages[count_type]);
}
+static inline int get_dirty_dents(struct inode *inode)
+{
+ return atomic_read(&F2FS_I(inode)->dirty_dents);
+}
+
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
{
unsigned int pages_per_sec = sbi->segs_per_sec *
@@ -656,11 +743,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
{
- block_t ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_block_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_block_count;
}
static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
@@ -679,9 +762,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- int offset = (flag == NAT_BITMAP) ?
+ int offset;
+
+ if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+ if (flag == NAT_BITMAP)
+ return &ckpt->sit_nat_version_bitmap;
+ else
+ return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+ } else {
+ offset = (flag == NAT_BITMAP) ?
le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+ return &ckpt->sit_nat_version_bitmap + offset;
+ }
}
static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
@@ -708,96 +800,85 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
}
static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
- struct inode *inode,
- unsigned int count)
+ struct inode *inode)
{
block_t valid_block_count;
unsigned int valid_node_count;
spin_lock(&sbi->stat_lock);
- valid_block_count = sbi->total_valid_block_count + (block_t)count;
- sbi->alloc_valid_block_count += (block_t)count;
- valid_node_count = sbi->total_valid_node_count + count;
-
- if (valid_block_count > sbi->user_block_count) {
+ valid_block_count = sbi->total_valid_block_count + 1;
+ if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
- if (valid_node_count > sbi->total_node_count) {
+ valid_node_count = sbi->total_valid_node_count + 1;
+ if (unlikely(valid_node_count > sbi->total_node_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
if (inode)
- inode->i_blocks += count;
- sbi->total_valid_node_count = valid_node_count;
- sbi->total_valid_block_count = valid_block_count;
+ inode->i_blocks++;
+
+ sbi->alloc_valid_block_count++;
+ sbi->total_valid_node_count++;
+ sbi->total_valid_block_count++;
spin_unlock(&sbi->stat_lock);
return true;
}
static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
- struct inode *inode,
- unsigned int count)
+ struct inode *inode)
{
spin_lock(&sbi->stat_lock);
- BUG_ON(sbi->total_valid_block_count < count);
- BUG_ON(sbi->total_valid_node_count < count);
- BUG_ON(inode->i_blocks < count);
+ f2fs_bug_on(!sbi->total_valid_block_count);
+ f2fs_bug_on(!sbi->total_valid_node_count);
+ f2fs_bug_on(!inode->i_blocks);
- inode->i_blocks -= count;
- sbi->total_valid_node_count -= count;
- sbi->total_valid_block_count -= (block_t)count;
+ inode->i_blocks--;
+ sbi->total_valid_node_count--;
+ sbi->total_valid_block_count--;
spin_unlock(&sbi->stat_lock);
}
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
{
- unsigned int ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_node_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_node_count;
}
static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
{
spin_lock(&sbi->stat_lock);
- BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count);
+ f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count);
sbi->total_valid_inode_count++;
spin_unlock(&sbi->stat_lock);
}
-static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
+static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
{
spin_lock(&sbi->stat_lock);
- BUG_ON(!sbi->total_valid_inode_count);
+ f2fs_bug_on(!sbi->total_valid_inode_count);
sbi->total_valid_inode_count--;
spin_unlock(&sbi->stat_lock);
- return 0;
}
static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
{
- unsigned int ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_inode_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_inode_count;
}
static inline void f2fs_put_page(struct page *page, int unlock)
{
- if (!page || IS_ERR(page))
+ if (!page)
return;
if (unlock) {
- BUG_ON(!PageLocked(page));
+ f2fs_bug_on(!PageLocked(page));
unlock_page(page);
}
page_cache_release(page);
@@ -814,9 +895,23 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)
}
static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
- size_t size, void (*ctor)(void *))
+ size_t size)
{
- return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
+ return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
+}
+
+static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags)
+{
+ void *entry;
+retry:
+ entry = kmem_cache_alloc(cachep, flags);
+ if (!entry) {
+ cond_resched();
+ goto retry;
+ }
+
+ return entry;
}
#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
@@ -879,12 +974,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)
enum {
FI_NEW_INODE, /* indicate newly allocated inode */
FI_DIRTY_INODE, /* indicate inode is dirty or not */
+ FI_DIRTY_DIR, /* indicate directory has dirty pages */
FI_INC_LINK, /* need to increment i_nlink */
FI_ACL_MODE, /* indicate acl mode */
FI_NO_ALLOC, /* should not allocate any blocks */
FI_UPDATE_DIR, /* should update inode block for consistency */
FI_DELAY_IPUT, /* used for the recovery */
+ FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
+ FI_INLINE_DATA, /* used for inline data*/
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -922,6 +1020,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
{
if (ri->i_inline & F2FS_INLINE_XATTR)
set_inode_flag(fi, FI_INLINE_XATTR);
+ if (ri->i_inline & F2FS_INLINE_DATA)
+ set_inode_flag(fi, FI_INLINE_DATA);
}
static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -931,41 +1031,75 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
if (is_inode_flag_set(fi, FI_INLINE_XATTR))
ri->i_inline |= F2FS_INLINE_XATTR;
+ if (is_inode_flag_set(fi, FI_INLINE_DATA))
+ ri->i_inline |= F2FS_INLINE_DATA;
+}
+
+static inline int f2fs_has_inline_xattr(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
}
static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
{
- if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+ if (f2fs_has_inline_xattr(&fi->vfs_inode))
return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
return DEF_ADDRS_PER_INODE;
}
static inline void *inline_xattr_addr(struct page *page)
{
- struct f2fs_inode *ri;
- ri = (struct f2fs_inode *)page_address(page);
+ struct f2fs_inode *ri = F2FS_INODE(page);
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
F2FS_INLINE_XATTR_ADDRS]);
}
static inline int inline_xattr_size(struct inode *inode)
{
- if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR))
+ if (f2fs_has_inline_xattr(inode))
return F2FS_INLINE_XATTR_ADDRS << 2;
else
return 0;
}
+static inline int f2fs_has_inline_data(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
+}
+
+static inline void *inline_data_addr(struct page *page)
+{
+ struct f2fs_inode *ri = F2FS_INODE(page);
+ return (void *)&(ri->i_addr[1]);
+}
+
static inline int f2fs_readonly(struct super_block *sb)
{
return sb->s_flags & MS_RDONLY;
}
+static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
+{
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ sbi->sb->s_flags |= MS_RDONLY;
+}
+
+#define get_inode_mode(i) \
+ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
+ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
+
+/* get offset of first page in next direct node */
+#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \
+ ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \
+ (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \
+ ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+
/*
* file.c
*/
int f2fs_sync_file(struct file *, loff_t, loff_t, int);
void truncate_data_blocks(struct dnode_of_data *);
+int truncate_blocks(struct inode *, u64);
void f2fs_truncate(struct inode *);
int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int f2fs_setattr(struct dentry *, struct iattr *);
@@ -979,8 +1113,9 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
*/
void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
+int try_to_free_nats(struct f2fs_sb_info *, int);
void update_inode(struct inode *, struct page *);
-int update_inode_page(struct inode *);
+void update_inode_page(struct inode *);
int f2fs_write_inode(struct inode *, struct writeback_control *);
void f2fs_evict_inode(struct inode *);
@@ -1028,12 +1163,16 @@ f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
struct dnode_of_data;
struct node_info;
+bool available_free_memory(struct f2fs_sb_info *, int);
int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
+bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
+void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
int truncate_xattr_node(struct inode *, struct page *);
-int remove_inode_page(struct inode *);
+int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
+void remove_inode_page(struct inode *);
struct page *new_inode_page(struct inode *, const struct qstr *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
void ra_node_page(struct f2fs_sb_info *, nid_t);
@@ -1046,6 +1185,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
void recover_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, struct node_info *, block_t);
+bool recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
int restore_node_summary(struct f2fs_sb_info *, unsigned int,
struct f2fs_summary_block *);
@@ -1059,24 +1199,30 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void f2fs_balance_fs(struct f2fs_sb_info *);
+void f2fs_balance_fs_bg(struct f2fs_sb_info *);
+int f2fs_issue_flush(struct f2fs_sb_info *);
+int create_flush_cmd_control(struct f2fs_sb_info *);
+void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
+void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
-struct bio *f2fs_bio_alloc(struct block_device *, int);
-void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool);
-void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
void write_meta_page(struct f2fs_sb_info *, struct page *);
-void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
- block_t, block_t *);
-void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
- block_t, block_t *);
-void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t);
+void write_node_page(struct f2fs_sb_info *, struct page *,
+ struct f2fs_io_info *, unsigned int, block_t, block_t *);
+void write_data_page(struct page *, struct dnode_of_data *, block_t *,
+ struct f2fs_io_info *);
+void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
void recover_data_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
void rewrite_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
+void allocate_data_block(struct f2fs_sb_info *, struct page *,
+ block_t, block_t *, struct f2fs_summary *, int);
+void f2fs_wait_on_page_writeback(struct page *, enum page_type);
void write_data_summaries(struct f2fs_sb_info *, block_t);
void write_node_summaries(struct f2fs_sb_info *, block_t);
int lookup_journal_in_cursum(struct f2fs_summary_block *,
@@ -1084,23 +1230,25 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *,
void flush_sit_entries(struct f2fs_sb_info *);
int build_segment_manager(struct f2fs_sb_info *);
void destroy_segment_manager(struct f2fs_sb_info *);
+int __init create_segment_manager_caches(void);
+void destroy_segment_manager_caches(void);
/*
* checkpoint.c
*/
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
-int recover_orphan_inodes(struct f2fs_sb_info *);
+void recover_orphan_inodes(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *);
void set_dirty_dir_page(struct inode *, struct page *);
void add_dirty_dir_inode(struct inode *);
void remove_dirty_dir_inode(struct inode *);
-struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, bool);
void init_orphan_info(struct f2fs_sb_info *);
@@ -1110,13 +1258,18 @@ void destroy_checkpoint_caches(void);
/*
* data.c
*/
+void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
+int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int);
+void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t,
+ struct f2fs_io_info *);
int reserve_new_block(struct dnode_of_data *);
+int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
void update_extent_cache(block_t, struct dnode_of_data *);
struct page *find_data_page(struct inode *, pgoff_t, bool);
struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
-int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int);
-int do_write_data_page(struct page *);
+int do_write_data_page(struct page *, struct f2fs_io_info *);
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
/*
* gc.c
@@ -1149,13 +1302,13 @@ struct f2fs_stat_info {
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, sits, fnids;
int total_count, utilization;
- int bg_gc;
+ int bg_gc, inline_inode;
unsigned int valid_count, valid_node_count, valid_inode_count;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
int rsvd_segs, overp_segs;
int dirty_count, node_pages, meta_pages;
- int prefree_count, call_count;
+ int prefree_count, call_count, cp_count;
int tot_segs, node_segs, data_segs, free_segs, free_secs;
int tot_blks, data_blks, node_blks;
int curseg[NR_CURSEG_TYPE];
@@ -1169,10 +1322,31 @@ struct f2fs_stat_info {
static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
{
- return (struct f2fs_stat_info*)sbi->stat_info;
+ return (struct f2fs_stat_info *)sbi->stat_info;
}
-#define stat_inc_call_count(si) ((si)->call_count++)
+#define stat_inc_cp_count(si) ((si)->cp_count++)
+#define stat_inc_call_count(si) ((si)->call_count++)
+#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
+#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
+#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
+#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++)
+#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++)
+#define stat_inc_inline_inode(inode) \
+ do { \
+ if (f2fs_has_inline_data(inode)) \
+ ((F2FS_SB(inode->i_sb))->inline_inode++); \
+ } while (0)
+#define stat_dec_inline_inode(inode) \
+ do { \
+ if (f2fs_has_inline_data(inode)) \
+ ((F2FS_SB(inode->i_sb))->inline_inode--); \
+ } while (0)
+
+#define stat_inc_seg_type(sbi, curseg) \
+ ((sbi)->segment_count[(curseg)->alloc_type]++)
+#define stat_inc_block_count(sbi, curseg) \
+ ((sbi)->block_count[(curseg)->alloc_type]++)
#define stat_inc_seg_count(sbi, type) \
do { \
@@ -1206,7 +1380,17 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);
void __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
+#define stat_inc_cp_count(si)
#define stat_inc_call_count(si)
+#define stat_inc_bggc_count(si)
+#define stat_inc_dirty_dir(sbi)
+#define stat_dec_dirty_dir(sbi)
+#define stat_inc_total_hit(sb)
+#define stat_inc_read_hit(sb)
+#define stat_inc_inline_inode(inode)
+#define stat_dec_inline_inode(inode)
+#define stat_inc_seg_type(sbi, curseg)
+#define stat_inc_block_count(sbi, curseg)
#define stat_inc_seg_count(si, type)
#define stat_inc_tot_blk_count(si, blks)
#define stat_inc_data_blk_count(si, blks)
@@ -1227,4 +1411,14 @@ extern const struct address_space_operations f2fs_meta_aops;
extern const struct inode_operations f2fs_dir_inode_operations;
extern const struct inode_operations f2fs_symlink_inode_operations;
extern const struct inode_operations f2fs_special_inode_operations;
+
+/*
+ * inline.c
+ */
+bool f2fs_may_inline(struct inode *);
+int f2fs_read_inline_data(struct inode *, struct page *);
+int f2fs_convert_inline_data(struct inode *, pgoff_t);
+int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
+void truncate_inline_data(struct inode *, u64);
+int recover_inline_data(struct inode *, struct page *);
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 02c906971cc..7d8b9627509 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -19,6 +19,7 @@
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <linux/mount.h>
+#include <linux/pagevec.h>
#include "f2fs.h"
#include "node.h"
@@ -33,41 +34,26 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- block_t old_blk_addr;
struct dnode_of_data dn;
- int err, ilock;
+ int err;
f2fs_balance_fs(sbi);
sb_start_pagefault(inode->i_sb);
/* block allocation */
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, page->index, ALLOC_NODE);
- if (err) {
- mutex_unlock_op(sbi, ilock);
+ err = f2fs_reserve_block(&dn, page->index);
+ f2fs_unlock_op(sbi);
+ if (err)
goto out;
- }
-
- old_blk_addr = dn.data_blkaddr;
-
- if (old_blk_addr == NULL_ADDR) {
- err = reserve_new_block(&dn);
- if (err) {
- f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, ilock);
- goto out;
- }
- }
- f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, ilock);
file_update_time(vma->vm_file);
lock_page(page);
- if (page->mapping != inode->i_mapping ||
+ if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
- !PageUptodate(page)) {
+ !PageUptodate(page))) {
unlock_page(page);
err = -EFAULT;
goto out;
@@ -88,9 +74,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
set_page_dirty(page);
SetPageUptodate(page);
+ trace_f2fs_vm_page_mkwrite(page, DATA);
mapped:
/* fill the page */
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(err);
@@ -98,6 +85,7 @@ out:
static const struct vm_operations_struct f2fs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = f2fs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
@@ -125,6 +113,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int ret = 0;
bool need_cp = false;
@@ -134,7 +123,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
.for_reclaim = 0,
};
- if (f2fs_readonly(inode->i_sb))
+ if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
trace_f2fs_sync_file_enter(inode);
@@ -147,7 +136,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
- mutex_lock(&inode->i_mutex);
+ down_read(&fi->i_sem);
/*
* Both of fdatasync() and fsync() are able to be recovered from
@@ -164,40 +153,174 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
+ up_read(&fi->i_sem);
+
if (need_cp) {
nid_t pino;
- F2FS_I(inode)->xattr_ver = 0;
-
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
+
+ down_write(&fi->i_sem);
+ F2FS_I(inode)->xattr_ver = 0;
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
F2FS_I(inode)->i_pino = pino;
file_got_pino(inode);
+ up_write(&fi->i_sem);
mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
goto out;
+ } else {
+ up_write(&fi->i_sem);
}
} else {
/* if there is no written node page, write its inode page */
while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
+ if (fsync_mark_done(sbi, inode->i_ino))
+ goto out;
mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
goto out;
}
- filemap_fdatawait_range(sbi->node_inode->i_mapping,
- 0, LONG_MAX);
- ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
+ if (ret)
+ goto out;
+ ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
- mutex_unlock(&inode->i_mutex);
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
+static pgoff_t __get_first_dirty_index(struct address_space *mapping,
+ pgoff_t pgofs, int whence)
+{
+ struct pagevec pvec;
+ int nr_pages;
+
+ if (whence != SEEK_DATA)
+ return 0;
+
+ /* find first dirty page index */
+ pagevec_init(&pvec, 0);
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
+ pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+ pagevec_release(&pvec);
+ return pgofs;
+}
+
+static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+ int whence)
+{
+ switch (whence) {
+ case SEEK_DATA:
+ if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+ (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ return true;
+ break;
+ case SEEK_HOLE:
+ if (blkaddr == NULL_ADDR)
+ return true;
+ break;
+ }
+ return false;
+}
+
+static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+ struct dnode_of_data dn;
+ pgoff_t pgofs, end_offset, dirty;
+ loff_t data_ofs = offset;
+ loff_t isize;
+ int err = 0;
+
+ mutex_lock(&inode->i_mutex);
+
+ isize = i_size_read(inode);
+ if (offset >= isize)
+ goto fail;
+
+ /* handle inline data case */
+ if (f2fs_has_inline_data(inode)) {
+ if (whence == SEEK_HOLE)
+ data_ofs = isize;
+ goto found;
+ }
+
+ pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+
+ dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
+
+ for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+ if (err && err != -ENOENT) {
+ goto fail;
+ } else if (err == -ENOENT) {
+ /* direct node is not exist */
+ if (whence == SEEK_DATA) {
+ pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
+ F2FS_I(inode));
+ continue;
+ } else {
+ goto found;
+ }
+ }
+
+ end_offset = IS_INODE(dn.node_page) ?
+ ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+
+ /* find data/hole in dnode block */
+ for (; dn.ofs_in_node < end_offset;
+ dn.ofs_in_node++, pgofs++,
+ data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ block_t blkaddr;
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+ if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ f2fs_put_dnode(&dn);
+ goto found;
+ }
+ }
+ f2fs_put_dnode(&dn);
+ }
+
+ if (whence == SEEK_DATA)
+ goto fail;
+found:
+ if (whence == SEEK_HOLE && data_ofs > isize)
+ data_ofs = isize;
+ mutex_unlock(&inode->i_mutex);
+ return vfs_setpos(file, data_ofs, maxbytes);
+fail:
+ mutex_unlock(&inode->i_mutex);
+ return -ENXIO;
+}
+
+static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+
+ switch (whence) {
+ case SEEK_SET:
+ case SEEK_CUR:
+ case SEEK_END:
+ return generic_file_llseek_size(file, offset, whence,
+ maxbytes, i_size_read(inode));
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ return f2fs_seek_block(file, offset, whence);
+ }
+
+ return -EINVAL;
+}
+
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
@@ -215,7 +338,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
raw_node = F2FS_NODE(dn->node_page);
addr = blkaddr_in_node(raw_node) + ofs;
- for ( ; count > 0; count--, addr++, dn->ofs_in_node++) {
+ for (; count > 0; count--, addr++, dn->ofs_in_node++) {
block_t blkaddr = le32_to_cpu(*addr);
if (blkaddr == NULL_ADDR)
continue;
@@ -246,6 +369,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
struct page *page;
+ if (f2fs_has_inline_data(inode))
+ return truncate_inline_data(inode, from);
+
if (!offset)
return;
@@ -254,48 +380,48 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
return;
lock_page(page);
- if (page->mapping != inode->i_mapping) {
+ if (unlikely(page->mapping != inode->i_mapping)) {
f2fs_put_page(page, 1);
return;
}
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
f2fs_put_page(page, 1);
}
-static int truncate_blocks(struct inode *inode, u64 from)
+int truncate_blocks(struct inode *inode, u64 from)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blocksize = inode->i_sb->s_blocksize;
struct dnode_of_data dn;
pgoff_t free_from;
- int count = 0, ilock = -1;
- int err;
+ int count = 0, err = 0;
trace_f2fs_truncate_blocks_enter(inode, from);
+ if (f2fs_has_inline_data(inode))
+ goto done;
+
free_from = (pgoff_t)
((from + blocksize - 1) >> (sbi->log_blocksize));
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
if (err) {
if (err == -ENOENT)
goto free_next;
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
trace_f2fs_truncate_blocks_exit(inode, err);
return err;
}
- if (IS_INODE(dn.node_page))
- count = ADDRS_PER_INODE(F2FS_I(inode));
- else
- count = ADDRS_PER_BLOCK;
+ count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
count -= dn.ofs_in_node;
- BUG_ON(count < 0);
+ f2fs_bug_on(count < 0);
if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
truncate_data_blocks_range(&dn, count);
@@ -305,8 +431,8 @@ static int truncate_blocks(struct inode *inode, u64 from)
f2fs_put_dnode(&dn);
free_next:
err = truncate_inode_blocks(inode, free_from);
- mutex_unlock_op(sbi, ilock);
-
+ f2fs_unlock_op(sbi);
+done:
/* lastly zero out the first data page */
truncate_partial_data_page(inode, from);
@@ -380,6 +506,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
+ err = f2fs_convert_inline_data(inode, attr->ia_size);
+ if (err)
+ return err;
+
truncate_setsize(inode, attr->ia_size);
f2fs_truncate(inode);
f2fs_balance_fs(F2FS_SB(inode->i_sb));
@@ -388,7 +518,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
__setattr_copy(inode, attr);
if (attr->ia_valid & ATTR_MODE) {
- err = f2fs_acl_chmod(inode);
+ err = posix_acl_chmod(inode, get_inode_mode(inode));
if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
inode->i_mode = fi->i_acl_mode;
clear_inode_flag(fi, FI_ACL_MODE);
@@ -403,12 +533,14 @@ const struct inode_operations f2fs_file_inode_operations = {
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
+ .set_acl = f2fs_set_acl,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = f2fs_listxattr,
.removexattr = generic_removexattr,
#endif
+ .fiemap = f2fs_fiemap,
};
static void fill_zero(struct inode *inode, pgoff_t index,
@@ -416,19 +548,18 @@ static void fill_zero(struct inode *inode, pgoff_t index,
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *page;
- int ilock;
if (!len)
return;
f2fs_balance_fs(sbi);
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
page = get_new_data_page(inode, NULL, index, false);
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
if (!IS_ERR(page)) {
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -458,12 +589,16 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
return 0;
}
-static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
+static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t pg_start, pg_end;
loff_t off_start, off_end;
int ret = 0;
+ ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1);
+ if (ret)
+ return ret;
+
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -484,7 +619,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
struct address_space *mapping = inode->i_mapping;
loff_t blk_start, blk_end;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- int ilock;
f2fs_balance_fs(sbi);
@@ -493,18 +627,12 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
ret = truncate_hole(inode, pg_start, pg_end);
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
}
}
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- i_size_read(inode) <= (offset + len)) {
- i_size_write(inode, offset);
- mark_inode_dirty(inode);
- }
-
return ret;
}
@@ -521,35 +649,29 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (ret)
return ret;
+ ret = f2fs_convert_inline_data(inode, offset + len);
+ if (ret)
+ return ret;
+
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
off_start = offset & (PAGE_CACHE_SIZE - 1);
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+ f2fs_lock_op(sbi);
+
for (index = pg_start; index <= pg_end; index++) {
struct dnode_of_data dn;
- int ilock;
- ilock = mutex_lock_op(sbi);
+ if (index == pg_end && !off_end)
+ goto noalloc;
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
- if (ret) {
- mutex_unlock_op(sbi, ilock);
+ ret = f2fs_reserve_block(&dn, index);
+ if (ret)
break;
- }
-
- if (dn.data_blkaddr == NULL_ADDR) {
- ret = reserve_new_block(&dn);
- if (ret) {
- f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, ilock);
- break;
- }
- }
- f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, ilock);
-
+noalloc:
if (pg_start == pg_end)
new_size = offset + len;
else if (index == pg_start && off_start)
@@ -564,7 +686,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
i_size_read(inode) < new_size) {
i_size_write(inode, new_size);
mark_inode_dirty(inode);
+ update_inode_page(inode);
}
+ f2fs_unlock_op(sbi);
return ret;
}
@@ -578,8 +702,10 @@ static long f2fs_fallocate(struct file *file, int mode,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
+ mutex_lock(&inode->i_mutex);
+
if (mode & FALLOC_FL_PUNCH_HOLE)
- ret = punch_hole(inode, offset, len, mode);
+ ret = punch_hole(inode, offset, len);
else
ret = expand_inode_data(inode, offset, len, mode);
@@ -587,6 +713,9 @@ static long f2fs_fallocate(struct file *file, int mode,
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
}
+
+ mutex_unlock(&inode->i_mutex);
+
trace_f2fs_fallocate(inode, mode, offset, len, ret);
return ret;
}
@@ -682,11 +811,11 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
#endif
const struct file_operations f2fs_file_operations = {
- .llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
+ .llseek = f2fs_llseek,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
.open = generic_file_open,
.mmap = f2fs_file_mmap,
.fsync = f2fs_sync_file,
@@ -696,5 +825,5 @@ const struct file_operations f2fs_file_operations = {
.compat_ioctl = f2fs_compat_ioctl,
#endif
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2f157e88368..b90dbe55403 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -77,13 +77,15 @@ static int gc_thread_func(void *data)
else
wait_ms = increase_sleep_time(gc_th, wait_ms);
-#ifdef CONFIG_F2FS_STAT_FS
- sbi->bg_gc++;
-#endif
+ stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi))
wait_ms = gc_th->no_gc_sleep_time;
+
+ /* balancing f2fs's metadata periodically */
+ f2fs_balance_fs_bg(sbi);
+
} while (!kthread_should_stop());
return 0;
}
@@ -117,7 +119,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
kfree(gc_th);
sbi->gc_thread = NULL;
}
-
out:
return err;
}
@@ -162,8 +163,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->ofs_unit = sbi->segs_per_sec;
}
- if (p->max_search > MAX_VICTIM_SEARCH)
- p->max_search = MAX_VICTIM_SEARCH;
+ if (p->max_search > sbi->max_victim_search)
+ p->max_search = sbi->max_victim_search;
p->offset = sbi->last_victim[p->gc_mode];
}
@@ -236,8 +237,8 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
-static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
- struct victim_sel_policy *p)
+static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
+ unsigned int segno, struct victim_sel_policy *p)
{
if (p->alloc_mode == SSR)
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
@@ -293,7 +294,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
break;
}
- p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
+
+ p.offset = segno + p.ofs_unit;
+ if (p.ofs_unit > 1)
+ p.offset -= segno % p.ofs_unit;
+
secno = GET_SECNO(sbi, segno);
if (sec_usage_check(sbi, secno))
@@ -306,10 +311,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (p.min_cost > cost) {
p.min_segno = segno;
p.min_cost = cost;
- }
-
- if (cost == max_cost)
+ } else if (unlikely(cost == max_cost)) {
continue;
+ }
if (nsearched++ >= p.max_search) {
sbi->last_victim[p.gc_mode] = segno;
@@ -358,12 +362,8 @@ static void add_gc_inode(struct inode *inode, struct list_head *ilist)
iput(inode);
return;
}
-repeat:
- new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
- if (!new_ie) {
- cond_resched();
- goto repeat;
- }
+
+ new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
new_ie->inode = inode;
list_add_tail(&new_ie->list, ilist);
}
@@ -428,7 +428,7 @@ next_step:
/* set page dirty and write it */
if (gc_type == FG_GC) {
- f2fs_wait_on_page_writeback(node_page, NODE, true);
+ f2fs_wait_on_page_writeback(node_page, NODE);
set_page_dirty(node_page);
} else {
if (!PageWriteback(node_page))
@@ -520,23 +520,23 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
static void move_data_page(struct inode *inode, struct page *page, int gc_type)
{
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = WRITE_SYNC,
+ };
+
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
set_page_dirty(page);
set_cold_data(page);
} else {
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA);
- if (clear_page_dirty_for_io(page) &&
- S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
+ if (clear_page_dirty_for_io(page))
inode_dec_dirty_dents(inode);
- }
set_cold_data(page);
- do_write_data_page(page);
+ do_write_data_page(page, &fio);
clear_cold_data(page);
}
out:
@@ -630,7 +630,7 @@ next_iput:
goto next_step;
if (gc_type == FG_GC) {
- f2fs_submit_bio(sbi, DATA, true);
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
/*
* In the case of FG_GC, it'd be better to reclaim this victim
@@ -663,8 +663,6 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
/* read segment summary of victim */
sum_page = get_sum_page(sbi, segno);
- if (IS_ERR(sum_page))
- return;
blk_start_plug(&plug);
@@ -696,7 +694,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&ilist);
gc_more:
- if (!(sbi->sb->s_flags & MS_ACTIVE))
+ if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
+ goto stop;
+ if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
goto stop;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
@@ -708,6 +708,11 @@ gc_more:
goto stop;
ret = 0;
+ /* readahead multi ssa blocks those have contiguous address */
+ if (sbi->segs_per_sec > 1)
+ ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
+ META_SSA);
+
for (i = 0; i < sbi->segs_per_sec; i++)
do_garbage_collect(sbi, segno + i, &ilist, gc_type);
@@ -737,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
int __init create_gc_caches(void)
{
winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
- sizeof(struct inode_entry), NULL);
+ sizeof(struct inode_entry));
if (!winode_slab)
return -ENOMEM;
return 0;
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 507056d2220..5d5eb6047bf 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -20,7 +20,7 @@
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
/* Search max. number of dirty segments to select a victim segment */
-#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */
+#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
struct f2fs_gc_kthread {
struct task_struct *f2fs_gc_task;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
new file mode 100644
index 00000000000..1bba5228c19
--- /dev/null
+++ b/fs/f2fs/inline.c
@@ -0,0 +1,250 @@
+/*
+ * fs/f2fs/inline.c
+ * Copyright (c) 2013, Intel Corporation
+ * Authors: Huajun Li <huajun.li@intel.com>
+ * Haicheng Li <haicheng.li@intel.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+
+bool f2fs_may_inline(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ block_t nr_blocks;
+ loff_t i_size;
+
+ if (!test_opt(sbi, INLINE_DATA))
+ return false;
+
+ nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
+ if (inode->i_blocks > nr_blocks)
+ return false;
+
+ i_size = i_size_read(inode);
+ if (i_size > MAX_INLINE_DATA)
+ return false;
+
+ return true;
+}
+
+int f2fs_read_inline_data(struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct page *ipage;
+ void *src_addr, *dst_addr;
+
+ if (page->index) {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ goto out;
+ }
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ unlock_page(page);
+ return PTR_ERR(ipage);
+ }
+
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+ /* Copy the whole inline data block */
+ src_addr = inline_data_addr(ipage);
+ dst_addr = kmap(page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ kunmap(page);
+ f2fs_put_page(ipage, 1);
+
+out:
+ SetPageUptodate(page);
+ unlock_page(page);
+
+ return 0;
+}
+
+static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
+{
+ int err;
+ struct page *ipage;
+ struct dnode_of_data dn;
+ void *src_addr, *dst_addr;
+ block_t new_blk_addr;
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = WRITE_SYNC | REQ_PRIO,
+ };
+
+ f2fs_lock_op(sbi);
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
+ }
+
+ /*
+ * i_addr[0] is not used for inline data,
+ * so reserving new block will not destroy inline data
+ */
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, 0);
+ if (err)
+ goto out;
+
+ f2fs_wait_on_page_writeback(page, DATA);
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+ /* Copy the whole inline data block */
+ src_addr = inline_data_addr(ipage);
+ dst_addr = kmap(page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ kunmap(page);
+ SetPageUptodate(page);
+
+ /* write data page to try to make data consistent */
+ set_page_writeback(page);
+ write_data_page(page, &dn, &new_blk_addr, &fio);
+ update_extent_cache(new_blk_addr, &dn);
+ f2fs_wait_on_page_writeback(page, DATA);
+
+ /* clear inline data and flag after data writeback */
+ zero_user_segment(ipage, INLINE_DATA_OFFSET,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ stat_dec_inline_inode(inode);
+
+ sync_inode_page(&dn);
+ f2fs_put_dnode(&dn);
+out:
+ f2fs_unlock_op(sbi);
+ return err;
+}
+
+int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
+{
+ struct page *page;
+ int err;
+
+ if (!f2fs_has_inline_data(inode))
+ return 0;
+ else if (to_size <= MAX_INLINE_DATA)
+ return 0;
+
+ page = grab_cache_page(inode->i_mapping, 0);
+ if (!page)
+ return -ENOMEM;
+
+ err = __f2fs_convert_inline_data(inode, page);
+ f2fs_put_page(page, 1);
+ return err;
+}
+
+int f2fs_write_inline_data(struct inode *inode,
+ struct page *page, unsigned size)
+{
+ void *src_addr, *dst_addr;
+ struct page *ipage;
+ struct dnode_of_data dn;
+ int err;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
+ if (err)
+ return err;
+ ipage = dn.inode_page;
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ zero_user_segment(ipage, INLINE_DATA_OFFSET,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ src_addr = kmap(page);
+ dst_addr = inline_data_addr(ipage);
+ memcpy(dst_addr, src_addr, size);
+ kunmap(page);
+
+ /* Release the first data block if it is allocated */
+ if (!f2fs_has_inline_data(inode)) {
+ truncate_data_blocks_range(&dn, 1);
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ stat_inc_inline_inode(inode);
+ }
+
+ sync_inode_page(&dn);
+ f2fs_put_dnode(&dn);
+
+ return 0;
+}
+
+void truncate_inline_data(struct inode *inode, u64 from)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct page *ipage;
+
+ if (from >= MAX_INLINE_DATA)
+ return;
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage))
+ return;
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
+ zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ set_page_dirty(ipage);
+ f2fs_put_page(ipage, 1);
+}
+
+int recover_inline_data(struct inode *inode, struct page *npage)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct f2fs_inode *ri = NULL;
+ void *src_addr, *dst_addr;
+ struct page *ipage;
+
+ /*
+ * The inline_data recovery policy is as follows.
+ * [prev.] [next] of inline_data flag
+ * o o -> recover inline_data
+ * o x -> remove inline_data, and then recover data blocks
+ * x o -> remove inline_data, and then recover inline_data
+ * x x -> recover data blocks
+ */
+ if (IS_INODE(npage))
+ ri = F2FS_INODE(npage);
+
+ if (f2fs_has_inline_data(inode) &&
+ ri && ri->i_inline & F2FS_INLINE_DATA) {
+process_inline:
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(IS_ERR(ipage));
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
+ src_addr = inline_data_addr(npage);
+ dst_addr = inline_data_addr(ipage);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+ return -1;
+ }
+
+ if (f2fs_has_inline_data(inode)) {
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(IS_ERR(ipage));
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ zero_user_segment(ipage, INLINE_DATA_OFFSET,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+ } else if (ri && ri->i_inline & F2FS_INLINE_DATA) {
+ truncate_blocks(inode, 0);
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ goto process_inline;
+ }
+ return 0;
+}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 9339cd29204..2cf6962f6cc 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -12,6 +12,7 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/bitops.h>
#include "f2fs.h"
#include "node.h"
@@ -21,20 +22,49 @@
void f2fs_set_inode_flags(struct inode *inode)
{
unsigned int flags = F2FS_I(inode)->i_flags;
-
- inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
- S_NOATIME | S_DIRSYNC);
+ unsigned int new_fl = 0;
if (flags & FS_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & FS_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & FS_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & FS_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+ set_mask_bits(&inode->i_flags,
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
+}
+
+static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+{
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ if (ri->i_addr[0])
+ inode->i_rdev =
+ old_decode_dev(le32_to_cpu(ri->i_addr[0]));
+ else
+ inode->i_rdev =
+ new_decode_dev(le32_to_cpu(ri->i_addr[1]));
+ }
+}
+
+static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+{
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ if (old_valid_dev(inode->i_rdev)) {
+ ri->i_addr[0] =
+ cpu_to_le32(old_encode_dev(inode->i_rdev));
+ ri->i_addr[1] = 0;
+ } else {
+ ri->i_addr[0] = 0;
+ ri->i_addr[1] =
+ cpu_to_le32(new_encode_dev(inode->i_rdev));
+ ri->i_addr[2] = 0;
+ }
+ }
}
static int do_read_inode(struct inode *inode)
@@ -42,13 +72,13 @@ static int do_read_inode(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
- struct f2fs_node *rn;
struct f2fs_inode *ri;
/* Check if ino is within scope */
if (check_nid_range(sbi, inode->i_ino)) {
f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
(unsigned long) inode->i_ino);
+ WARN_ON(1);
return -EINVAL;
}
@@ -56,8 +86,7 @@ static int do_read_inode(struct inode *inode)
if (IS_ERR(node_page))
return PTR_ERR(node_page);
- rn = F2FS_NODE(node_page);
- ri = &(rn->i);
+ ri = F2FS_INODE(node_page);
inode->i_mode = le16_to_cpu(ri->i_mode);
i_uid_write(inode, le32_to_cpu(ri->i_uid));
@@ -73,10 +102,6 @@ static int do_read_inode(struct inode *inode)
inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
inode->i_generation = le32_to_cpu(ri->i_generation);
- if (ri->i_addr[0])
- inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
- else
- inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
@@ -84,8 +109,14 @@ static int do_read_inode(struct inode *inode)
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
+ fi->i_dir_level = ri->i_dir_level;
+
get_extent_info(&fi->ext, ri->i_ext);
get_inline_info(fi, ri);
+
+ /* get rdev by using inline_info */
+ __get_inode_rdev(inode, ri);
+
f2fs_put_page(node_page, 1);
return 0;
}
@@ -149,13 +180,11 @@ bad_inode:
void update_inode(struct inode *inode, struct page *node_page)
{
- struct f2fs_node *rn;
struct f2fs_inode *ri;
- f2fs_wait_on_page_writeback(node_page, NODE, false);
+ f2fs_wait_on_page_writeback(node_page, NODE);
- rn = F2FS_NODE(node_page);
- ri = &(rn->i);
+ ri = F2FS_INODE(node_page);
ri->i_mode = cpu_to_le16(inode->i_mode);
ri->i_advise = F2FS_I(inode)->i_advise;
@@ -178,43 +207,38 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
+ ri->i_dir_level = F2FS_I(inode)->i_dir_level;
- if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
- if (old_valid_dev(inode->i_rdev)) {
- ri->i_addr[0] =
- cpu_to_le32(old_encode_dev(inode->i_rdev));
- ri->i_addr[1] = 0;
- } else {
- ri->i_addr[0] = 0;
- ri->i_addr[1] =
- cpu_to_le32(new_encode_dev(inode->i_rdev));
- ri->i_addr[2] = 0;
- }
- }
-
+ __set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
set_page_dirty(node_page);
+
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
}
-int update_inode_page(struct inode *inode)
+void update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *node_page;
-
+retry:
node_page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(node_page))
- return PTR_ERR(node_page);
-
+ if (IS_ERR(node_page)) {
+ int err = PTR_ERR(node_page);
+ if (err == -ENOMEM) {
+ cond_resched();
+ goto retry;
+ } else if (err != -ENOENT) {
+ f2fs_stop_checkpoint(sbi);
+ }
+ return;
+ }
update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
- return 0;
}
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- int ret, ilock;
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
@@ -227,14 +251,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to lock here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
- ilock = mutex_lock_op(sbi);
- ret = update_inode_page(inode);
- mutex_unlock_op(sbi, ilock);
+ f2fs_lock_op(sbi);
+ update_inode_page(inode);
+ f2fs_unlock_op(sbi);
if (wbc)
f2fs_balance_fs(sbi);
- return ret;
+ return 0;
}
/*
@@ -243,16 +267,15 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
void f2fs_evict_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- int ilock;
trace_f2fs_evict_inode(inode);
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
goto no_delete;
- BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents));
+ f2fs_bug_on(get_dirty_dents(inode));
remove_dirty_dir_inode(inode);
if (inode->i_nlink || is_bad_inode(inode))
@@ -265,11 +288,13 @@ void f2fs_evict_inode(struct inode *inode)
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
remove_inode_page(inode);
- mutex_unlock_op(sbi, ilock);
+ stat_dec_inline_inode(inode);
+ f2fs_unlock_op(sbi);
sb_end_intwrite(inode->i_sb);
no_delete:
clear_inode(inode);
+ invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 2a5359c990f..a6bdddc33ce 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -27,32 +27,23 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_t ino;
struct inode *inode;
bool nid_free = false;
- int err, ilock;
+ int err;
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
if (!alloc_nid(sbi, &ino)) {
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
err = -ENOSPC;
goto fail;
}
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
- inode->i_uid = current_fsuid();
-
- if (dir->i_mode & S_ISGID) {
- inode->i_gid = dir->i_gid;
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else {
- inode->i_gid = current_fsgid();
- }
+ inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
- inode->i_mode = mode;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_generation = sbi->s_next_generation++;
@@ -115,7 +106,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
nid_t ino = 0;
- int err, ilock;
+ int err;
f2fs_balance_fs(sbi);
@@ -131,9 +122,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
if (err)
goto out;
@@ -157,7 +148,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
struct inode *inode = old_dentry->d_inode;
struct super_block *sb = dir->i_sb;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- int err, ilock;
+ int err;
f2fs_balance_fs(sbi);
@@ -165,9 +156,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
ihold(inode);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
if (err)
goto out;
@@ -207,6 +198,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
inode = f2fs_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
+
+ stat_inc_inline_inode(inode);
}
return d_splice_alias(inode, dentry);
@@ -220,7 +213,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
struct f2fs_dir_entry *de;
struct page *page;
int err = -ENOENT;
- int ilock;
trace_f2fs_unlink_enter(dir, dentry);
f2fs_balance_fs(sbi);
@@ -229,16 +221,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
if (!de)
goto fail;
+ f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err) {
+ f2fs_unlock_op(sbi);
kunmap(page);
f2fs_put_page(page, 0);
goto fail;
}
-
- ilock = mutex_lock_op(sbi);
f2fs_delete_entry(de, page, inode);
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
/* In order to evict this inode, we set it dirty */
mark_inode_dirty(inode);
@@ -254,7 +246,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
size_t symlen = strlen(symname) + 1;
- int err, ilock;
+ int err;
f2fs_balance_fs(sbi);
@@ -265,9 +257,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
if (err)
goto out;
@@ -290,7 +282,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
- int err, ilock;
+ int err;
f2fs_balance_fs(sbi);
@@ -304,9 +296,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
if (err)
goto out_fail;
@@ -342,7 +334,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
int err = 0;
- int ilock;
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -356,9 +347,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
if (err)
goto out;
@@ -387,7 +378,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
- int err = -ENOENT, ilock = -1;
+ int err = -ENOENT;
f2fs_balance_fs(sbi);
@@ -402,7 +393,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_old;
}
- ilock = mutex_lock_op(sbi);
+ f2fs_lock_op(sbi);
if (new_inode) {
@@ -428,9 +419,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME;
+ down_write(&F2FS_I(new_inode)->i_sem);
if (old_dir_entry)
drop_nlink(new_inode);
drop_nlink(new_inode);
+ up_write(&F2FS_I(new_inode)->i_sem);
+
+ mark_inode_dirty(new_inode);
if (!new_inode->i_nlink)
add_orphan_inode(sbi, new_inode->i_ino);
@@ -450,6 +445,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
+ down_write(&F2FS_I(old_inode)->i_sem);
+ file_lost_pino(old_inode);
+ up_write(&F2FS_I(old_inode)->i_sem);
+
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
@@ -459,25 +458,28 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
+ update_inode_page(old_inode);
} else {
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
drop_nlink(old_dir);
+ mark_inode_dirty(old_dir);
update_inode_page(old_dir);
}
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
return 0;
put_out_dir:
- f2fs_put_page(new_page, 1);
+ kunmap(new_page);
+ f2fs_put_page(new_page, 0);
out_dir:
if (old_dir_entry) {
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
- mutex_unlock_op(sbi, ilock);
+ f2fs_unlock_op(sbi);
out_old:
kunmap(old_page);
f2fs_put_page(old_page, 0);
@@ -498,6 +500,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
+ .set_acl = f2fs_set_acl,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -524,6 +527,7 @@ const struct inode_operations f2fs_special_inode_operations = {
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
+ .set_acl = f2fs_set_acl,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 51ef2789443..4b697ccc9b0 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -21,9 +21,35 @@
#include "segment.h"
#include <trace/events/f2fs.h>
+#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
+
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
+bool available_free_memory(struct f2fs_sb_info *sbi, int type)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct sysinfo val;
+ unsigned long mem_size = 0;
+ bool res = false;
+
+ si_meminfo(&val);
+ /* give 25%, 25%, 50% memory for each components respectively */
+ if (type == FREE_NIDS) {
+ mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == NAT_ENTRIES) {
+ mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == DIRTY_DENTS) {
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
+ mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+ }
+ return res;
+}
+
static void clear_node_page_dirty(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -82,40 +108,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
return dst_page;
}
-/*
- * Readahead NAT pages
- */
-static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
-{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct blk_plug plug;
- struct page *page;
- pgoff_t index;
- int i;
-
- blk_start_plug(&plug);
-
- for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
- if (nid >= nm_i->max_nid)
- nid = 0;
- index = current_nat_addr(sbi, nid);
-
- page = grab_cache_page(mapping, index);
- if (!page)
- continue;
- if (PageUptodate(page)) {
- f2fs_put_page(page, 1);
- continue;
- }
- if (f2fs_readpage(sbi, page, index, READ))
- continue;
-
- f2fs_put_page(page, 0);
- }
- blk_finish_plug(&plug);
-}
-
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
return radix_tree_lookup(&nm_i->nat_root, n);
@@ -149,6 +141,32 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
return is_cp;
}
+bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+ bool fsync_done = false;
+
+ read_lock(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e)
+ fsync_done = e->fsync_done;
+ read_unlock(&nm_i->nat_tree_lock);
+ return fsync_done;
+}
+
+void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+
+ write_lock(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e)
+ e->fsync_done = false;
+ write_unlock(&nm_i->nat_tree_lock);
+}
+
static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
@@ -162,6 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
}
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
+ new->checkpointed = true;
list_add_tail(&new->list, &nm_i->nat_entries);
nm_i->nat_cnt++;
return new;
@@ -180,16 +199,13 @@ retry:
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
- nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
- nat_set_ino(e, le32_to_cpu(ne->ino));
- nat_set_version(e, ne->version);
- e->checkpointed = true;
+ node_info_from_raw_nat(&e->ni, ne);
}
write_unlock(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
- block_t new_blkaddr)
+ block_t new_blkaddr, bool fsync_done)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -203,8 +219,7 @@ retry:
goto retry;
}
e->ni = *ni;
- e->checkpointed = true;
- BUG_ON(ni->blk_addr == NEW_ADDR);
+ f2fs_bug_on(ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
/*
* when nid is reallocated,
@@ -212,19 +227,16 @@ retry:
* So, reinitialize it with new information.
*/
e->ni = *ni;
- BUG_ON(ni->blk_addr != NULL_ADDR);
+ f2fs_bug_on(ni->blk_addr != NULL_ADDR);
}
- if (new_blkaddr == NEW_ADDR)
- e->checkpointed = false;
-
/* sanity check */
- BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
- BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
+ f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
+ f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
new_blkaddr == NULL_ADDR);
- BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
+ f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
+ f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR &&
nat_get_blkaddr(e) != NULL_ADDR &&
new_blkaddr == NEW_ADDR);
@@ -237,14 +249,19 @@ retry:
/* change address */
nat_set_blkaddr(e, new_blkaddr);
__set_nat_cache_dirty(nm_i, e);
+
+ /* update fsync_mark if its inode nat entry is still alive */
+ e = __lookup_nat_cache(nm_i, ni->ino);
+ if (e)
+ e->fsync_done = fsync_done;
write_unlock(&nm_i->nat_tree_lock);
}
-static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD)
+ if (available_free_memory(sbi, NAT_ENTRIES))
return 0;
write_lock(&nm_i->nat_tree_lock);
@@ -391,8 +408,8 @@ got:
/*
* Caller should call f2fs_put_dnode(dn).
- * Also, it should grab and release a mutex by calling mutex_lock_op() and
- * mutex_unlock_op() only if ro is not set RDONLY_NODE.
+ * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
* In the case of RDONLY_NODE, we don't need to care about mutex.
*/
int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
@@ -495,15 +512,15 @@ static void truncate_node(struct dnode_of_data *dn)
get_node_info(sbi, dn->nid, &ni);
if (dn->inode->i_blocks == 0) {
- BUG_ON(ni.blk_addr != NULL_ADDR);
+ f2fs_bug_on(ni.blk_addr != NULL_ADDR);
goto invalidate;
}
- BUG_ON(ni.blk_addr == NULL_ADDR);
+ f2fs_bug_on(ni.blk_addr == NULL_ADDR);
/* Deallocate node address */
invalidate_blocks(sbi, ni.blk_addr);
- dec_valid_node_count(sbi, dn->inode, 1);
- set_node_addr(sbi, &ni, NULL_ADDR);
+ dec_valid_node_count(sbi, dn->inode);
+ set_node_addr(sbi, &ni, NULL_ADDR, false);
if (dn->nid == dn->inode->i_ino) {
remove_orphan_inode(sbi, dn->nid);
@@ -516,6 +533,10 @@ invalidate:
F2FS_SET_SB_DIRT(sbi);
f2fs_put_page(dn->node_page, 1);
+
+ invalidate_mapping_pages(NODE_MAPPING(sbi),
+ dn->node_page->index, dn->node_page->index);
+
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
}
@@ -631,19 +652,19 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
return 0;
/* get indirect nodes in the path */
- for (i = 0; i < depth - 1; i++) {
+ for (i = 0; i < idx + 1; i++) {
/* refernece count'll be increased */
pages[i] = get_node_page(sbi, nid[i]);
if (IS_ERR(pages[i])) {
- depth = i + 1;
err = PTR_ERR(pages[i]);
+ idx = i - 1;
goto fail;
}
nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
}
/* free direct nodes linked to a partial indirect node */
- for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) {
+ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
child_nid = get_nid(pages[idx], i, false);
if (!child_nid)
continue;
@@ -654,7 +675,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
set_nid(pages[idx], i, 0, false);
}
- if (offset[depth - 1] == 0) {
+ if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
truncate_node(dn);
@@ -662,9 +683,10 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
f2fs_put_page(pages[idx], 1);
}
offset[idx]++;
- offset[depth - 1] = 0;
+ offset[idx + 1] = 0;
+ idx--;
fail:
- for (i = depth - 3; i >= 0; i--)
+ for (i = idx; i >= 0; i--)
f2fs_put_page(pages[i], 1);
trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
@@ -678,11 +700,10 @@ fail:
int truncate_inode_blocks(struct inode *inode, pgoff_t from)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct address_space *node_mapping = sbi->node_inode->i_mapping;
int err = 0, cont = 1;
int level, offset[4], noffset[4];
unsigned int nofs = 0;
- struct f2fs_node *rn;
+ struct f2fs_inode *ri;
struct dnode_of_data dn;
struct page *page;
@@ -699,7 +720,7 @@ restart:
set_new_dnode(&dn, inode, page, NULL, 0);
unlock_page(page);
- rn = F2FS_NODE(page);
+ ri = F2FS_INODE(page);
switch (level) {
case 0:
case 1:
@@ -709,7 +730,7 @@ restart:
nofs = noffset[1];
if (!offset[level - 1])
goto skip_partial;
- err = truncate_partial_nodes(&dn, &rn->i, offset, level);
+ err = truncate_partial_nodes(&dn, ri, offset, level);
if (err < 0 && err != -ENOENT)
goto fail;
nofs += 1 + NIDS_PER_BLOCK;
@@ -718,7 +739,7 @@ restart:
nofs = 5 + 2 * NIDS_PER_BLOCK;
if (!offset[level - 1])
goto skip_partial;
- err = truncate_partial_nodes(&dn, &rn->i, offset, level);
+ err = truncate_partial_nodes(&dn, ri, offset, level);
if (err < 0 && err != -ENOENT)
goto fail;
break;
@@ -728,7 +749,7 @@ restart:
skip_partial:
while (cont) {
- dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]);
+ dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
switch (offset[0]) {
case NODE_DIR1_BLOCK:
case NODE_DIR2_BLOCK:
@@ -751,14 +772,14 @@ skip_partial:
if (err < 0 && err != -ENOENT)
goto fail;
if (offset[1] == 0 &&
- rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
+ ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
lock_page(page);
- if (page->mapping != node_mapping) {
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto restart;
}
- wait_on_page_writeback(page);
- rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
+ f2fs_wait_on_page_writeback(page, NODE);
+ ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
}
@@ -794,38 +815,34 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
set_new_dnode(&dn, inode, page, npage, nid);
if (page)
- dn.inode_page_locked = 1;
+ dn.inode_page_locked = true;
truncate_node(&dn);
return 0;
}
/*
- * Caller should grab and release a mutex by calling mutex_lock_op() and
- * mutex_unlock_op().
+ * Caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
*/
-int remove_inode_page(struct inode *inode)
+void remove_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *page;
nid_t ino = inode->i_ino;
struct dnode_of_data dn;
- int err;
page = get_node_page(sbi, ino);
if (IS_ERR(page))
- return PTR_ERR(page);
+ return;
- err = truncate_xattr_node(inode, page);
- if (err) {
+ if (truncate_xattr_node(inode, page)) {
f2fs_put_page(page, 1);
- return err;
+ return;
}
-
/* 0 is possible, after f2fs_new_inode() is failed */
- BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
+ f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
set_new_dnode(&dn, inode, page, page, ino);
truncate_node(&dn);
- return 0;
}
struct page *new_inode_page(struct inode *inode, const struct qstr *name)
@@ -843,19 +860,18 @@ struct page *new_node_page(struct dnode_of_data *dn,
unsigned int ofs, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
- struct address_space *mapping = sbi->node_inode->i_mapping;
struct node_info old_ni, new_ni;
struct page *page;
int err;
- if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return ERR_PTR(-EPERM);
- page = grab_cache_page(mapping, dn->nid);
+ page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
if (!page)
return ERR_PTR(-ENOMEM);
- if (!inc_valid_node_count(sbi, dn->inode, 1)) {
+ if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
err = -ENOSPC;
goto fail;
}
@@ -863,17 +879,18 @@ struct page *new_node_page(struct dnode_of_data *dn,
get_node_info(sbi, dn->nid, &old_ni);
/* Reinitialize old_ni with new node page */
- BUG_ON(old_ni.blk_addr != NULL_ADDR);
+ f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
new_ni = old_ni;
new_ni.ino = dn->inode->i_ino;
- set_node_addr(sbi, &new_ni, NEW_ADDR);
+ set_node_addr(sbi, &new_ni, NEW_ADDR, false);
+ f2fs_wait_on_page_writeback(page, NODE);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
set_page_dirty(page);
- if (ofs == XATTR_NODE_OFFSET)
+ if (f2fs_has_xattr_block(ofs))
F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
dn->node_page = page;
@@ -898,14 +915,14 @@ fail:
* LOCKED_PAGE: f2fs_put_page(page, 1)
* error: nothing
*/
-static int read_node_page(struct page *page, int type)
+static int read_node_page(struct page *page, int rw)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
struct node_info ni;
get_node_info(sbi, page->index, &ni);
- if (ni.blk_addr == NULL_ADDR) {
+ if (unlikely(ni.blk_addr == NULL_ADDR)) {
f2fs_put_page(page, 1);
return -ENOENT;
}
@@ -913,7 +930,7 @@ static int read_node_page(struct page *page, int type)
if (PageUptodate(page))
return LOCKED_PAGE;
- return f2fs_readpage(sbi, page, ni.blk_addr, type);
+ return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
}
/*
@@ -921,18 +938,17 @@ static int read_node_page(struct page *page, int type)
*/
void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- struct address_space *mapping = sbi->node_inode->i_mapping;
struct page *apage;
int err;
- apage = find_get_page(mapping, nid);
+ apage = find_get_page(NODE_MAPPING(sbi), nid);
if (apage && PageUptodate(apage)) {
f2fs_put_page(apage, 0);
return;
}
f2fs_put_page(apage, 0);
- apage = grab_cache_page(mapping, nid);
+ apage = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!apage)
return;
@@ -945,11 +961,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
{
- struct address_space *mapping = sbi->node_inode->i_mapping;
struct page *page;
int err;
repeat:
- page = grab_cache_page(mapping, nid);
+ page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -960,17 +975,15 @@ repeat:
goto got_it;
lock_page(page);
- if (!PageUptodate(page)) {
+ if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
- if (page->mapping != mapping) {
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
got_it:
- BUG_ON(nid != nid_of_node(page));
- mark_page_accessed(page);
return page;
}
@@ -981,7 +994,6 @@ got_it:
struct page *get_node_page_ra(struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
- struct address_space *mapping = sbi->node_inode->i_mapping;
struct blk_plug plug;
struct page *page;
int err, i, end;
@@ -992,7 +1004,7 @@ struct page *get_node_page_ra(struct page *parent, int start)
if (!nid)
return ERR_PTR(-ENOENT);
repeat:
- page = grab_cache_page(mapping, nid);
+ page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -1017,16 +1029,15 @@ repeat:
blk_finish_plug(&plug);
lock_page(page);
- if (page->mapping != mapping) {
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
page_hit:
- if (!PageUptodate(page)) {
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
- mark_page_accessed(page);
return page;
}
@@ -1048,7 +1059,6 @@ void sync_inode_page(struct dnode_of_data *dn)
int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
struct writeback_control *wbc)
{
- struct address_space *mapping = sbi->node_inode->i_mapping;
pgoff_t index, end;
struct pagevec pvec;
int step = ino ? 2 : 0;
@@ -1062,7 +1072,7 @@ next_step:
while (index <= end) {
int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
@@ -1095,7 +1105,7 @@ next_step:
else if (!trylock_page(page))
continue;
- if (unlikely(page->mapping != mapping)) {
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
continue_unlock:
unlock_page(page);
continue;
@@ -1122,7 +1132,7 @@ continue_unlock:
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
}
- mapping->a_ops->writepage(page, wbc);
+ NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
wrote++;
if (--wbc->nr_to_write == 0)
@@ -1143,11 +1153,52 @@ continue_unlock:
}
if (wrote)
- f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL);
-
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
return nwritten;
}
+int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ pgoff_t index = 0, end = LONG_MAX;
+ struct pagevec pvec;
+ int ret2 = 0, ret = 0;
+
+ pagevec_init(&pvec, 0);
+
+ while (index <= end) {
+ int i, nr_pages;
+ nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_WRITEBACK,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ if (nr_pages == 0)
+ break;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ /* until radix tree lookup accepts end_index */
+ if (unlikely(page->index > end))
+ continue;
+
+ if (ino && ino_of_node(page) == ino) {
+ f2fs_wait_on_page_writeback(page, NODE);
+ if (TestClearPageError(page))
+ ret = -EIO;
+ }
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+
+ if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
+ ret2 = -ENOSPC;
+ if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
+ ret2 = -EIO;
+ if (!ret)
+ ret = ret2;
+ return ret;
+}
+
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
@@ -1155,66 +1206,71 @@ static int f2fs_write_node_page(struct page *page,
nid_t nid;
block_t new_addr;
struct node_info ni;
+ struct f2fs_io_info fio = {
+ .type = NODE,
+ .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ };
+
+ trace_f2fs_writepage(page, NODE);
+
+ if (unlikely(sbi->por_doing))
+ goto redirty_out;
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, NODE);
/* get old block addr of this node page */
nid = nid_of_node(page);
- BUG_ON(page->index != nid);
+ f2fs_bug_on(page->index != nid);
get_node_info(sbi, nid, &ni);
/* This page is already truncated */
- if (ni.blk_addr == NULL_ADDR) {
+ if (unlikely(ni.blk_addr == NULL_ADDR)) {
dec_page_count(sbi, F2FS_DIRTY_NODES);
unlock_page(page);
return 0;
}
- if (wbc->for_reclaim) {
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- wbc->pages_skipped++;
- set_page_dirty(page);
- return AOP_WRITEPAGE_ACTIVATE;
- }
+ if (wbc->for_reclaim)
+ goto redirty_out;
mutex_lock(&sbi->node_write);
set_page_writeback(page);
- write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
- set_node_addr(sbi, &ni, new_addr);
+ write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
+ set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
mutex_unlock(&sbi->node_write);
unlock_page(page);
return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
}
-/*
- * It is very important to gather dirty pages and write at once, so that we can
- * submit a big bio without interfering other data writes.
- * Be default, 512 pages (2MB) * 3 node types, is more reasonable.
- */
-#define COLLECT_DIRTY_NODES 1536
static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- long nr_to_write = wbc->nr_to_write;
+ long diff;
- /* First check balancing cached NAT entries */
- if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
- f2fs_sync_fs(sbi->sb, true);
- return 0;
- }
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
+
+ /* balancing f2fs's metadata in background */
+ f2fs_balance_fs_bg(sbi);
/* collect a number of dirty node pages and write together */
- if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
- return 0;
+ if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+ goto skip_write;
- /* if mounting is failed, skip writing node pages */
- wbc->nr_to_write = 3 * max_hw_blocks(sbi);
+ diff = nr_pages_to_write(sbi, NODE, wbc);
+ wbc->sync_mode = WB_SYNC_NONE;
sync_node_pages(sbi, 0, wbc);
- wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) -
- wbc->nr_to_write);
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
+ return 0;
+
+skip_write:
+ wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
return 0;
}
@@ -1223,6 +1279,8 @@ static int f2fs_set_node_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+ trace_f2fs_set_page_dirty(page, NODE);
+
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
@@ -1260,59 +1318,51 @@ const struct address_space_operations f2fs_node_aops = {
.releasepage = f2fs_release_node_page,
};
-static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
+static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
+ nid_t n)
{
- struct list_head *this;
- struct free_nid *i;
- list_for_each(this, head) {
- i = list_entry(this, struct free_nid, list);
- if (i->nid == n)
- return i;
- }
- return NULL;
+ return radix_tree_lookup(&nm_i->free_nid_root, n);
}
-static void __del_from_free_nid_list(struct free_nid *i)
+static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
+ struct free_nid *i)
{
list_del(&i->list);
- kmem_cache_free(free_nid_slab, i);
+ radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
-static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
+static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
struct nat_entry *ne;
bool allocated = false;
- if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
+ if (!available_free_memory(sbi, FREE_NIDS))
return -1;
/* 0 nid should not be used */
- if (nid == 0)
+ if (unlikely(nid == 0))
return 0;
- if (!build)
- goto retry;
-
- /* do not add allocated nids */
- read_lock(&nm_i->nat_tree_lock);
- ne = __lookup_nat_cache(nm_i, nid);
- if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
- allocated = true;
- read_unlock(&nm_i->nat_tree_lock);
- if (allocated)
- return 0;
-retry:
- i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
- if (!i) {
- cond_resched();
- goto retry;
+ if (build) {
+ /* do not add allocated nids */
+ read_lock(&nm_i->nat_tree_lock);
+ ne = __lookup_nat_cache(nm_i, nid);
+ if (ne &&
+ (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
+ allocated = true;
+ read_unlock(&nm_i->nat_tree_lock);
+ if (allocated)
+ return 0;
}
+
+ i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = NID_NEW;
spin_lock(&nm_i->free_nid_list_lock);
- if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
+ if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
spin_unlock(&nm_i->free_nid_list_lock);
kmem_cache_free(free_nid_slab, i);
return 0;
@@ -1326,18 +1376,25 @@ retry:
static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct free_nid *i;
+ bool need_free = false;
+
spin_lock(&nm_i->free_nid_list_lock);
- i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+ i = __lookup_free_nid_list(nm_i, nid);
if (i && i->state == NID_NEW) {
- __del_from_free_nid_list(i);
+ __del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
+ need_free = true;
}
spin_unlock(&nm_i->free_nid_list_lock);
+
+ if (need_free)
+ kmem_cache_free(free_nid_slab, i);
}
-static void scan_nat_page(struct f2fs_nm_info *nm_i,
+static void scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct f2fs_nat_block *nat_blk = page_address(nat_page);
block_t blk_addr;
int i;
@@ -1346,13 +1403,13 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,
for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
- if (start_nid >= nm_i->max_nid)
+ if (unlikely(start_nid >= nm_i->max_nid))
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
- BUG_ON(blk_addr == NEW_ADDR);
+ f2fs_bug_on(blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR) {
- if (add_free_nid(nm_i, start_nid, true) < 0)
+ if (add_free_nid(sbi, start_nid, true) < 0)
break;
}
}
@@ -1371,16 +1428,16 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
return;
/* readahead nat pages to be scanned */
- ra_nat_pages(sbi, nid);
+ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
- scan_nat_page(nm_i, page, nid);
+ scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
- if (nid >= nm_i->max_nid)
+ if (unlikely(nid >= nm_i->max_nid))
nid = 0;
if (i++ == FREE_NID_PAGES)
@@ -1396,7 +1453,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
nid = le32_to_cpu(nid_in_journal(sum, i));
if (addr == NULL_ADDR)
- add_free_nid(nm_i, nid, true);
+ add_free_nid(sbi, nid, true);
else
remove_free_nid(nm_i, nid);
}
@@ -1412,23 +1469,20 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
- struct list_head *this;
retry:
- if (sbi->total_valid_node_count + 1 >= nm_i->max_nid)
+ if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
return false;
spin_lock(&nm_i->free_nid_list_lock);
/* We should not use stale free nids created by build_free_nids */
- if (nm_i->fcnt && !sbi->on_build_free_nids) {
- BUG_ON(list_empty(&nm_i->free_nid_list));
- list_for_each(this, &nm_i->free_nid_list) {
- i = list_entry(this, struct free_nid, list);
+ if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
+ f2fs_bug_on(list_empty(&nm_i->free_nid_list));
+ list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW)
break;
- }
- BUG_ON(i->state != NID_NEW);
+ f2fs_bug_on(i->state != NID_NEW);
*nid = i->nid;
i->state = NID_ALLOC;
nm_i->fcnt--;
@@ -1439,9 +1493,7 @@ retry:
/* Let's scan nat pages and its caches to get free nids */
mutex_lock(&nm_i->build_lock);
- sbi->on_build_free_nids = 1;
build_free_nids(sbi);
- sbi->on_build_free_nids = 0;
mutex_unlock(&nm_i->build_lock);
goto retry;
}
@@ -1455,10 +1507,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
struct free_nid *i;
spin_lock(&nm_i->free_nid_list_lock);
- i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
- BUG_ON(!i || i->state != NID_ALLOC);
- __del_from_free_nid_list(i);
+ i = __lookup_free_nid_list(nm_i, nid);
+ f2fs_bug_on(!i || i->state != NID_ALLOC);
+ __del_from_free_nid_list(nm_i, i);
spin_unlock(&nm_i->free_nid_list_lock);
+
+ kmem_cache_free(free_nid_slab, i);
}
/*
@@ -1468,20 +1522,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
+ bool need_free = false;
if (!nid)
return;
spin_lock(&nm_i->free_nid_list_lock);
- i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
- BUG_ON(!i || i->state != NID_ALLOC);
- if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
- __del_from_free_nid_list(i);
+ i = __lookup_free_nid_list(nm_i, nid);
+ f2fs_bug_on(!i || i->state != NID_ALLOC);
+ if (!available_free_memory(sbi, FREE_NIDS)) {
+ __del_from_free_nid_list(nm_i, i);
+ need_free = true;
} else {
i->state = NID_NEW;
nm_i->fcnt++;
}
spin_unlock(&nm_i->free_nid_list_lock);
+
+ if (need_free)
+ kmem_cache_free(free_nid_slab, i);
}
void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1489,90 +1548,200 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
block_t new_blkaddr)
{
rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
- set_node_addr(sbi, ni, new_blkaddr);
+ set_node_addr(sbi, ni, new_blkaddr, false);
clear_node_page_dirty(page);
}
+static void recover_inline_xattr(struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ void *src_addr, *dst_addr;
+ size_t inline_size;
+ struct page *ipage;
+ struct f2fs_inode *ri;
+
+ if (!f2fs_has_inline_xattr(inode))
+ return;
+
+ if (!IS_INODE(page))
+ return;
+
+ ri = F2FS_INODE(page);
+ if (!(ri->i_inline & F2FS_INLINE_XATTR))
+ return;
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(IS_ERR(ipage));
+
+ dst_addr = inline_xattr_addr(ipage);
+ src_addr = inline_xattr_addr(page);
+ inline_size = inline_xattr_size(inode);
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ memcpy(dst_addr, src_addr, inline_size);
+
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+}
+
+bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
+ nid_t new_xnid = nid_of_node(page);
+ struct node_info ni;
+
+ recover_inline_xattr(inode, page);
+
+ if (!f2fs_has_xattr_block(ofs_of_node(page)))
+ return false;
+
+ /* 1: invalidate the previous xattr nid */
+ if (!prev_xnid)
+ goto recover_xnid;
+
+ /* Deallocate node address */
+ get_node_info(sbi, prev_xnid, &ni);
+ f2fs_bug_on(ni.blk_addr == NULL_ADDR);
+ invalidate_blocks(sbi, ni.blk_addr);
+ dec_valid_node_count(sbi, inode);
+ set_node_addr(sbi, &ni, NULL_ADDR, false);
+
+recover_xnid:
+ /* 2: allocate new xattr nid */
+ if (unlikely(!inc_valid_node_count(sbi, inode)))
+ f2fs_bug_on(1);
+
+ remove_free_nid(NM_I(sbi), new_xnid);
+ get_node_info(sbi, new_xnid, &ni);
+ ni.ino = inode->i_ino;
+ set_node_addr(sbi, &ni, NEW_ADDR, false);
+ F2FS_I(inode)->i_xattr_nid = new_xnid;
+
+ /* 3: update xattr blkaddr */
+ refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
+ set_node_addr(sbi, &ni, blkaddr, false);
+
+ update_inode_page(inode);
+ return true;
+}
+
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
{
- struct address_space *mapping = sbi->node_inode->i_mapping;
- struct f2fs_node *src, *dst;
+ struct f2fs_inode *src, *dst;
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
- ipage = grab_cache_page(mapping, ino);
+ get_node_info(sbi, ino, &old_ni);
+
+ if (unlikely(old_ni.blk_addr != NULL_ADDR))
+ return -EINVAL;
+
+ ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
if (!ipage)
return -ENOMEM;
/* Should not use this inode from free nid list */
remove_free_nid(NM_I(sbi), ino);
- get_node_info(sbi, ino, &old_ni);
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
- src = F2FS_NODE(page);
- dst = F2FS_NODE(ipage);
+ src = F2FS_INODE(page);
+ dst = F2FS_INODE(ipage);
- memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
- dst->i.i_size = 0;
- dst->i.i_blocks = cpu_to_le64(1);
- dst->i.i_links = cpu_to_le32(1);
- dst->i.i_xattr_nid = 0;
+ memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
+ dst->i_size = 0;
+ dst->i_blocks = cpu_to_le64(1);
+ dst->i_links = cpu_to_le32(1);
+ dst->i_xattr_nid = 0;
new_ni = old_ni;
new_ni.ino = ino;
- if (!inc_valid_node_count(sbi, NULL, 1))
+ if (unlikely(!inc_valid_node_count(sbi, NULL)))
WARN_ON(1);
- set_node_addr(sbi, &new_ni, NEW_ADDR);
+ set_node_addr(sbi, &new_ni, NEW_ADDR, false);
inc_valid_inode_count(sbi);
f2fs_put_page(ipage, 1);
return 0;
}
+/*
+ * ra_sum_pages() merge contiguous pages into one bio and submit.
+ * these pre-readed pages are alloced in bd_inode's mapping tree.
+ */
+static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
+ int start, int nrpages)
+{
+ struct inode *inode = sbi->sb->s_bdev->bd_inode;
+ struct address_space *mapping = inode->i_mapping;
+ int i, page_idx = start;
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = READ_SYNC | REQ_META | REQ_PRIO
+ };
+
+ for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
+ /* alloc page in bd_inode for reading node summary info */
+ pages[i] = grab_cache_page(mapping, page_idx);
+ if (!pages[i])
+ break;
+ f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
+ }
+
+ f2fs_submit_merged_bio(sbi, META, READ);
+ return i;
+}
+
int restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
- struct page *page;
+ struct inode *inode = sbi->sb->s_bdev->bd_inode;
block_t addr;
- int i, last_offset;
-
- /* alloc temporal page for read node */
- page = alloc_page(GFP_NOFS | __GFP_ZERO);
- if (!page)
- return -ENOMEM;
- lock_page(page);
+ int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+ struct page *pages[bio_blocks];
+ int i, idx, last_offset, nrpages, err = 0;
/* scan the node segment */
last_offset = sbi->blocks_per_seg;
addr = START_BLOCK(sbi, segno);
sum_entry = &sum->entries[0];
- for (i = 0; i < last_offset; i++, sum_entry++) {
- /*
- * In order to read next node page,
- * we must clear PageUptodate flag.
- */
- ClearPageUptodate(page);
+ for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
+ nrpages = min(last_offset - i, bio_blocks);
- if (f2fs_readpage(sbi, page, addr, READ_SYNC))
- goto out;
+ /* read ahead node pages */
+ nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
+ if (!nrpages)
+ return -ENOMEM;
+
+ for (idx = 0; idx < nrpages; idx++) {
+ if (err)
+ goto skip;
+
+ lock_page(pages[idx]);
+ if (unlikely(!PageUptodate(pages[idx]))) {
+ err = -EIO;
+ } else {
+ rn = F2FS_NODE(pages[idx]);
+ sum_entry->nid = rn->footer.nid;
+ sum_entry->version = 0;
+ sum_entry->ofs_in_node = 0;
+ sum_entry++;
+ }
+ unlock_page(pages[idx]);
+skip:
+ page_cache_release(pages[idx]);
+ }
- lock_page(page);
- rn = F2FS_NODE(page);
- sum_entry->nid = rn->footer.nid;
- sum_entry->version = 0;
- sum_entry->ofs_in_node = 0;
- addr++;
+ invalidate_mapping_pages(inode->i_mapping, addr,
+ addr + nrpages);
}
- unlock_page(page);
-out:
- __free_pages(page, 0);
- return 0;
+ return err;
}
static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -1608,9 +1777,7 @@ retry:
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
- nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
- nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
- nat_set_version(ne, raw_ne.version);
+ node_info_from_raw_nat(&ne->ni, &raw_ne);
__set_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
}
@@ -1627,7 +1794,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- struct list_head *cur, *n;
+ struct nat_entry *ne, *cur;
struct page *page = NULL;
struct f2fs_nat_block *nat_blk = NULL;
nid_t start_nid = 0, end_nid = 0;
@@ -1639,18 +1806,16 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
mutex_lock(&curseg->curseg_mutex);
/* 1) flush dirty nat caches */
- list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
- struct nat_entry *ne;
+ list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
nid_t nid;
struct f2fs_nat_entry raw_ne;
int offset = -1;
- block_t new_blkaddr;
-
- ne = list_entry(cur, struct nat_entry, list);
- nid = nat_get_nid(ne);
if (nat_get_blkaddr(ne) == NEW_ADDR)
continue;
+
+ nid = nat_get_nid(ne);
+
if (flushed)
goto to_nat_page;
@@ -1677,14 +1842,10 @@ to_nat_page:
nat_blk = page_address(page);
}
- BUG_ON(!nat_blk);
+ f2fs_bug_on(!nat_blk);
raw_ne = nat_blk->entries[nid - start_nid];
flush_now:
- new_blkaddr = nat_get_blkaddr(ne);
-
- raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
- raw_ne.block_addr = cpu_to_le32(new_blkaddr);
- raw_ne.version = nat_get_version(ne);
+ raw_nat_from_node_info(&raw_ne, &ne->ni);
if (offset < 0) {
nat_blk->entries[nid - start_nid] = raw_ne;
@@ -1694,23 +1855,19 @@ flush_now:
}
if (nat_get_blkaddr(ne) == NULL_ADDR &&
- add_free_nid(NM_I(sbi), nid, false) <= 0) {
+ add_free_nid(sbi, nid, false) <= 0) {
write_lock(&nm_i->nat_tree_lock);
__del_from_nat_cache(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
} else {
write_lock(&nm_i->nat_tree_lock);
__clear_nat_cache_dirty(nm_i, ne);
- ne->checkpointed = true;
write_unlock(&nm_i->nat_tree_lock);
}
}
if (!flushed)
mutex_unlock(&curseg->curseg_mutex);
f2fs_put_page(page, 1);
-
- /* 2) shrink nat caches if necessary */
- try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
}
static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1725,10 +1882,16 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
/* segment_count_nat includes pair segment so divide to 2. */
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+
+ /* not used nids: 0, node, meta, (and root counted as valid node) */
+ nm_i->available_nids = nm_i->max_nid - 3;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
+ nm_i->ram_thresh = DEF_RAM_THRESHOLD;
+ INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -1781,11 +1944,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy free nid list */
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
- BUG_ON(i->state == NID_ALLOC);
- __del_from_free_nid_list(i);
+ f2fs_bug_on(i->state == NID_ALLOC);
+ __del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ kmem_cache_free(free_nid_slab, i);
+ spin_lock(&nm_i->free_nid_list_lock);
}
- BUG_ON(nm_i->fcnt);
+ f2fs_bug_on(nm_i->fcnt);
spin_unlock(&nm_i->free_nid_list_lock);
/* destroy nat cache */
@@ -1793,13 +1959,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
- for (idx = 0; idx < found; idx++) {
- struct nat_entry *e = natvec[idx];
- nid = nat_get_nid(e) + 1;
- __del_from_nat_cache(nm_i, e);
- }
+ nid = nat_get_nid(natvec[found - 1]) + 1;
+ for (idx = 0; idx < found; idx++)
+ __del_from_nat_cache(nm_i, natvec[idx]);
}
- BUG_ON(nm_i->nat_cnt);
+ f2fs_bug_on(nm_i->nat_cnt);
write_unlock(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
@@ -1810,12 +1974,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
int __init create_node_manager_caches(void)
{
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
- sizeof(struct nat_entry), NULL);
+ sizeof(struct nat_entry));
if (!nat_entry_slab)
return -ENOMEM;
free_nid_slab = f2fs_kmem_cache_create("free_nid",
- sizeof(struct free_nid), NULL);
+ sizeof(struct free_nid));
if (!free_nid_slab) {
kmem_cache_destroy(nat_entry_slab);
return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 3496bb3e15d..7281112cd1c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -17,14 +17,11 @@
/* # of pages to perform readahead before building free nids */
#define FREE_NID_PAGES 4
-/* maximum # of free node ids to produce during build_free_nids */
-#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
-
/* maximum readahead size for node during getting data blocks */
#define MAX_RA_NODE 128
-/* maximum cached nat entries to manage memory footprint */
-#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK)
+/* control the memory footprint threshold (10MB per 1GB ram) */
+#define DEF_RAM_THRESHOLD 10
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
@@ -45,6 +42,7 @@ struct node_info {
struct nat_entry {
struct list_head list; /* for clean or dirty nat list */
bool checkpointed; /* whether it is checkpointed or not */
+ bool fsync_done; /* whether the latest node has fsync mark */
struct node_info ni; /* in-memory node information */
};
@@ -58,9 +56,15 @@ struct nat_entry {
#define nat_set_version(nat, v) (nat->ni.version = v)
#define __set_nat_cache_dirty(nm_i, ne) \
- list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
+ do { \
+ ne->checkpointed = false; \
+ list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
+ } while (0)
#define __clear_nat_cache_dirty(nm_i, ne) \
- list_move_tail(&ne->list, &nm_i->nat_entries);
+ do { \
+ ne->checkpointed = true; \
+ list_move_tail(&ne->list, &nm_i->nat_entries); \
+ } while (0)
#define inc_node_version(version) (++version)
static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -71,6 +75,20 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
ni->version = raw_ne->version;
}
+static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
+ struct node_info *ni)
+{
+ raw_ne->ino = cpu_to_le32(ni->ino);
+ raw_ne->block_addr = cpu_to_le32(ni->blk_addr);
+ raw_ne->version = ni->version;
+}
+
+enum mem_type {
+ FREE_NIDS, /* indicates the free nid list */
+ NAT_ENTRIES, /* indicates the cached nat entry */
+ DIRTY_DENTS /* indicates dirty dentry pages */
+};
+
/*
* For free nid mangement
*/
@@ -224,13 +242,19 @@ static inline block_t next_blkaddr_of_node(struct page *node_page)
* | `- direct node (5 + N => 5 + 2N - 1)
* `- double indirect node (5 + 2N)
* `- indirect node (6 + 2N)
- * `- direct node (x(N + 1))
+ * `- direct node
+ * ......
+ * `- indirect node ((6 + 2N) + x(N + 1))
+ * `- direct node
+ * ......
+ * `- indirect node ((6 + 2N) + (N - 1)(N + 1))
+ * `- direct node
*/
static inline bool IS_DNODE(struct page *node_page)
{
unsigned int ofs = ofs_of_node(node_page);
- if (ofs == XATTR_NODE_OFFSET)
+ if (f2fs_has_xattr_block(ofs))
return false;
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
@@ -248,7 +272,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
- wait_on_page_writeback(p);
+ f2fs_wait_on_page_writeback(p, NODE);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 51ef5eec33d..a112368a4a8 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -27,21 +27,18 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
nid_t ino)
{
- struct list_head *this;
struct fsync_inode_entry *entry;
- list_for_each(this, head) {
- entry = list_entry(this, struct fsync_inode_entry, list);
+ list_for_each_entry(entry, head, list)
if (entry->inode->i_ino == ino)
return entry;
- }
+
return NULL;
}
static int recover_dentry(struct page *ipage, struct inode *inode)
{
- struct f2fs_node *raw_node = F2FS_NODE(ipage);
- struct f2fs_inode *raw_inode = &(raw_node->i);
+ struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
struct qstr name;
@@ -49,51 +46,71 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
struct inode *dir, *einode;
int err = 0;
- dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino);
- if (!dir) {
- dir = f2fs_iget(inode->i_sb, pino);
- if (IS_ERR(dir)) {
- err = PTR_ERR(dir);
- goto out;
- }
- set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
- add_dirty_dir_inode(dir);
+ dir = f2fs_iget(inode->i_sb, pino);
+ if (IS_ERR(dir)) {
+ err = PTR_ERR(dir);
+ goto out;
}
name.len = le32_to_cpu(raw_inode->i_namelen);
name.name = raw_inode->i_name;
+
+ if (unlikely(name.len > F2FS_NAME_LEN)) {
+ WARN_ON(1);
+ err = -ENAMETOOLONG;
+ goto out_err;
+ }
retry:
de = f2fs_find_entry(dir, &name, &page);
- if (de && inode->i_ino == le32_to_cpu(de->ino)) {
- kunmap(page);
- f2fs_put_page(page, 0);
- goto out;
- }
+ if (de && inode->i_ino == le32_to_cpu(de->ino))
+ goto out_unmap_put;
if (de) {
einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
if (IS_ERR(einode)) {
WARN_ON(1);
- if (PTR_ERR(einode) == -ENOENT)
+ err = PTR_ERR(einode);
+ if (err == -ENOENT)
err = -EEXIST;
- goto out;
+ goto out_unmap_put;
+ }
+ err = acquire_orphan_inode(F2FS_SB(inode->i_sb));
+ if (err) {
+ iput(einode);
+ goto out_unmap_put;
}
f2fs_delete_entry(de, page, einode);
iput(einode);
goto retry;
}
err = __f2fs_add_link(dir, &name, inode);
+ if (err)
+ goto out_err;
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
+ iput(dir);
+ } else {
+ add_dirty_dir_inode(dir);
+ set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
+ }
+
+ goto out;
+
+out_unmap_put:
+ kunmap(page);
+ f2fs_put_page(page, 0);
+out_err:
+ iput(dir);
out:
- f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: "
- "ino = %x, name = %s, dir = %lx, err = %d",
- ino_of_node(ipage), raw_inode->i_name,
+ f2fs_msg(inode->i_sb, KERN_NOTICE,
+ "%s: ino = %x, name = %s, dir = %lx, err = %d",
+ __func__, ino_of_node(ipage), raw_inode->i_name,
IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
static int recover_inode(struct inode *inode, struct page *node_page)
{
- struct f2fs_node *raw_node = F2FS_NODE(node_page);
- struct f2fs_inode *raw_inode = &(raw_node->i);
+ struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
if (!IS_INODE(node_page))
return 0;
@@ -125,7 +142,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
- blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* read node page */
page = alloc_page(GFP_F2FS_ZERO);
@@ -136,9 +153,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
while (1) {
struct fsync_inode_entry *entry;
- err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
+ err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);
if (err)
- goto out;
+ return err;
lock_page(page);
@@ -184,9 +201,10 @@ next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
}
+
unlock_page(page);
-out:
__free_pages(page, 0);
+
return err;
}
@@ -206,13 +224,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
{
struct seg_entry *sentry;
unsigned int segno = GET_SEGNO(sbi, blkaddr);
- unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
- (sbi->blocks_per_seg - 1);
+ unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+ struct f2fs_summary_block *sum_node;
struct f2fs_summary sum;
+ struct page *sum_page, *node_page;
nid_t ino, nid;
- void *kaddr;
struct inode *inode;
- struct page *node_page;
unsigned int offset;
block_t bidx;
int i;
@@ -226,18 +243,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
struct curseg_info *curseg = CURSEG_I(sbi, i);
if (curseg->segno == segno) {
sum = curseg->sum_blk->entries[blkoff];
- break;
+ goto got_it;
}
}
- if (i > CURSEG_COLD_DATA) {
- struct page *sum_page = get_sum_page(sbi, segno);
- struct f2fs_summary_block *sum_node;
- kaddr = page_address(sum_page);
- sum_node = (struct f2fs_summary_block *)kaddr;
- sum = sum_node->entries[blkoff];
- f2fs_put_page(sum_page, 1);
- }
+ sum_page = get_sum_page(sbi, segno);
+ sum_node = (struct f2fs_summary_block *)page_address(sum_page);
+ sum = sum_node->entries[blkoff];
+ f2fs_put_page(sum_page, 1);
+got_it:
/* Use the locked dnode page and inode */
nid = le32_to_cpu(sum.nid);
if (dn->inode->i_ino == nid) {
@@ -285,28 +299,31 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct f2fs_summary sum;
struct node_info ni;
int err = 0, recovered = 0;
- int ilock;
+
+ if (recover_inline_data(inode, page))
+ goto out;
+
+ if (recover_xattr_data(inode, page, blkaddr))
+ goto out;
start = start_bidx_of_node(ofs_of_node(page), fi);
- if (IS_INODE(page))
- end = start + ADDRS_PER_INODE(fi);
- else
- end = start + ADDRS_PER_BLOCK;
+ end = start + ADDRS_PER_PAGE(page, fi);
+
+ f2fs_lock_op(sbi);
- ilock = mutex_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
- mutex_unlock_op(sbi, ilock);
- return err;
+ f2fs_unlock_op(sbi);
+ goto out;
}
- wait_on_page_writeback(dn.node_page);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE);
get_node_info(sbi, dn.nid, &ni);
- BUG_ON(ni.ino != ino_of_node(page));
- BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page));
+ f2fs_bug_on(ni.ino != ino_of_node(page));
+ f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page));
for (; start < end; start++) {
block_t src, dest;
@@ -316,9 +333,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
if (src == NULL_ADDR) {
- int err = reserve_new_block(&dn);
+ err = reserve_new_block(&dn);
/* We should not get -ENOSPC */
- BUG_ON(err);
+ f2fs_bug_on(err);
}
/* Check the previous node page having this index */
@@ -349,11 +366,11 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
err:
f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, ilock);
-
- f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, "
- "recovered_data = %d blocks, err = %d",
- inode->i_ino, recovered, err);
+ f2fs_unlock_op(sbi);
+out:
+ f2fs_msg(sbi->sb, KERN_NOTICE,
+ "recover_data: ino = %lx, recovered = %d blocks, err = %d",
+ inode->i_ino, recovered, err);
return err;
}
@@ -371,7 +388,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* read node page */
- page = alloc_page(GFP_NOFS | __GFP_ZERO);
+ page = alloc_page(GFP_F2FS_ZERO);
if (!page)
return -ENOMEM;
@@ -380,9 +397,9 @@ static int recover_data(struct f2fs_sb_info *sbi,
while (1) {
struct fsync_inode_entry *entry;
- err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
+ err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);
if (err)
- goto out;
+ return err;
lock_page(page);
@@ -406,8 +423,8 @@ next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
}
+
unlock_page(page);
-out:
__free_pages(page, 0);
if (!err)
@@ -419,16 +436,17 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
{
struct list_head inode_list;
int err;
+ bool need_writecp = false;
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
- sizeof(struct fsync_inode_entry), NULL);
- if (unlikely(!fsync_entry_slab))
+ sizeof(struct fsync_inode_entry));
+ if (!fsync_entry_slab)
return -ENOMEM;
INIT_LIST_HEAD(&inode_list);
/* step #1: find fsynced inode numbers */
- sbi->por_doing = 1;
+ sbi->por_doing = true;
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
goto out;
@@ -436,14 +454,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
if (list_empty(&inode_list))
goto out;
+ need_writecp = true;
+
/* step #2: recover data */
err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
- BUG_ON(!list_empty(&inode_list));
+ f2fs_bug_on(!list_empty(&inode_list));
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
- sbi->por_doing = 0;
- if (!err)
+ sbi->por_doing = false;
+ if (!err && need_writecp)
write_checkpoint(sbi, false);
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 09af9c7b0f5..d04613df710 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -13,13 +13,165 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/prefetch.h>
+#include <linux/kthread.h>
#include <linux/vmalloc.h>
+#include <linux/swap.h>
#include "f2fs.h"
#include "segment.h"
#include "node.h"
#include <trace/events/f2fs.h>
+#define __reverse_ffz(x) __reverse_ffs(~(x))
+
+static struct kmem_cache *discard_entry_slab;
+
+/*
+ * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
+ * MSB and LSB are reversed in a byte by f2fs_set_bit.
+ */
+static inline unsigned long __reverse_ffs(unsigned long word)
+{
+ int num = 0;
+
+#if BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf0) == 0)
+ num += 4;
+ else
+ word >>= 4;
+ if ((word & 0xc) == 0)
+ num += 2;
+ else
+ word >>= 2;
+ if ((word & 0x2) == 0)
+ num += 1;
+ return num;
+}
+
+/*
+ * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue
+ * f2fs_set_bit makes MSB and LSB reversed in a byte.
+ * Example:
+ * LSB <--> MSB
+ * f2fs_set_bit(0, bitmap) => 0000 0001
+ * f2fs_set_bit(7, bitmap) => 1000 0000
+ */
+static unsigned long __find_rev_next_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BIT_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+ unsigned long mask, submask;
+ unsigned long quot, rest;
+
+ if (offset >= size)
+ return size;
+
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (!offset)
+ goto aligned;
+
+ tmp = *(p++);
+ quot = (offset >> 3) << 3;
+ rest = offset & 0x7;
+ mask = ~0UL << quot;
+ submask = (unsigned char)(0xff << rest) >> rest;
+ submask <<= quot;
+ mask &= submask;
+ tmp &= mask;
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+aligned:
+ while (size & ~(BITS_PER_LONG-1)) {
+ tmp = *(p++);
+ if (tmp)
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __reverse_ffs(tmp);
+}
+
+static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BIT_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+ unsigned long mask, submask;
+ unsigned long quot, rest;
+
+ if (offset >= size)
+ return size;
+
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (!offset)
+ goto aligned;
+
+ tmp = *(p++);
+ quot = (offset >> 3) << 3;
+ rest = offset & 0x7;
+ mask = ~(~0UL << quot);
+ submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
+ submask <<= quot;
+ mask += submask;
+ tmp |= mask;
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+aligned:
+ while (size & ~(BITS_PER_LONG - 1)) {
+ tmp = *(p++);
+ if (~tmp)
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __reverse_ffz(tmp);
+}
+
/*
* This function balances dirty node and dentry pages.
* In addition, it controls garbage collection.
@@ -36,6 +188,114 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
}
}
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
+{
+ /* check the # of cached NAT entries and prefree segments */
+ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
+ excess_prefree_segs(sbi))
+ f2fs_sync_fs(sbi->sb, true);
+}
+
+static int issue_flush_thread(void *data)
+{
+ struct f2fs_sb_info *sbi = data;
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ wait_queue_head_t *q = &fcc->flush_wait_queue;
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ spin_lock(&fcc->issue_lock);
+ if (fcc->issue_list) {
+ fcc->dispatch_list = fcc->issue_list;
+ fcc->issue_list = fcc->issue_tail = NULL;
+ }
+ spin_unlock(&fcc->issue_lock);
+
+ if (fcc->dispatch_list) {
+ struct bio *bio = bio_alloc(GFP_NOIO, 0);
+ struct flush_cmd *cmd, *next;
+ int ret;
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ ret = submit_bio_wait(WRITE_FLUSH, bio);
+
+ for (cmd = fcc->dispatch_list; cmd; cmd = next) {
+ cmd->ret = ret;
+ next = cmd->next;
+ complete(&cmd->wait);
+ }
+ bio_put(bio);
+ fcc->dispatch_list = NULL;
+ }
+
+ wait_event_interruptible(*q,
+ kthread_should_stop() || fcc->issue_list);
+ goto repeat;
+}
+
+int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd cmd;
+
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
+
+ init_completion(&cmd.wait);
+ cmd.next = NULL;
+
+ spin_lock(&fcc->issue_lock);
+ if (fcc->issue_list)
+ fcc->issue_tail->next = &cmd;
+ else
+ fcc->issue_list = &cmd;
+ fcc->issue_tail = &cmd;
+ spin_unlock(&fcc->issue_lock);
+
+ if (!fcc->dispatch_list)
+ wake_up(&fcc->flush_wait_queue);
+
+ wait_for_completion(&cmd.wait);
+
+ return cmd.ret;
+}
+
+int create_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ struct flush_cmd_control *fcc;
+ int err = 0;
+
+ fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
+ if (!fcc)
+ return -ENOMEM;
+ spin_lock_init(&fcc->issue_lock);
+ init_waitqueue_head(&fcc->flush_wait_queue);
+ sbi->sm_info->cmd_control_info = fcc;
+ fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+ "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(fcc->f2fs_issue_flush)) {
+ err = PTR_ERR(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ sbi->sm_info->cmd_control_info = NULL;
+ return err;
+ }
+
+ return err;
+}
+
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc =
+ sbi->sm_info->cmd_control_info;
+
+ if (fcc && fcc->f2fs_issue_flush)
+ kthread_stop(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ sbi->sm_info->cmd_control_info = NULL;
+}
+
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
@@ -50,20 +310,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
if (dirty_type == DIRTY) {
struct seg_entry *sentry = get_seg_entry(sbi, segno);
- enum dirty_type t = DIRTY_HOT_DATA;
+ enum dirty_type t = sentry->type;
- dirty_type = sentry->type;
-
- if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
- dirty_i->nr_dirty[dirty_type]++;
-
- /* Only one bitmap should be set */
- for (; t <= DIRTY_COLD_NODE; t++) {
- if (t == dirty_type)
- continue;
- if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
- dirty_i->nr_dirty[t]--;
- }
+ if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
+ dirty_i->nr_dirty[t]++;
}
}
@@ -76,12 +326,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
dirty_i->nr_dirty[dirty_type]--;
if (dirty_type == DIRTY) {
- enum dirty_type t = DIRTY_HOT_DATA;
+ struct seg_entry *sentry = get_seg_entry(sbi, segno);
+ enum dirty_type t = sentry->type;
- /* clear all the bitmaps */
- for (; t <= DIRTY_COLD_NODE; t++)
- if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
- dirty_i->nr_dirty[t]--;
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
+ dirty_i->nr_dirty[t]--;
if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
clear_bit(GET_SECNO(sbi, segno),
@@ -119,6 +368,69 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_unlock(&dirty_i->seglist_lock);
}
+static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
+ block_t blkstart, block_t blklen)
+{
+ sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
+ sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
+ trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+ return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
+}
+
+void discard_next_dnode(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+ block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+ if (f2fs_issue_discard(sbi, blkaddr, 1)) {
+ struct page *page = grab_meta_page(sbi, blkaddr);
+ /* zero-filled page */
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ }
+}
+
+static void add_discard_addrs(struct f2fs_sb_info *sbi,
+ unsigned int segno, struct seg_entry *se)
+{
+ struct list_head *head = &SM_I(sbi)->discard_list;
+ struct discard_entry *new;
+ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+ int max_blocks = sbi->blocks_per_seg;
+ unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+ unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+ unsigned long dmap[entries];
+ unsigned int start = 0, end = -1;
+ int i;
+
+ if (!test_opt(sbi, DISCARD))
+ return;
+
+ /* zero block will be discarded through the prefree list */
+ if (!se->valid_blocks || se->valid_blocks == max_blocks)
+ return;
+
+ /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
+ for (i = 0; i < entries; i++)
+ dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
+
+ while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+ start = __find_rev_next_bit(dmap, max_blocks, end + 1);
+ if (start >= max_blocks)
+ break;
+
+ end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
+
+ new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
+ INIT_LIST_HEAD(&new->list);
+ new->blkaddr = START_BLOCK(sbi, segno) + start;
+ new->len = end - start;
+
+ list_add_tail(&new->list, head);
+ SM_I(sbi)->nr_discards += end - start;
+ }
+}
+
/*
* Should call clear_prefree_segments after checkpoint is done.
*/
@@ -141,30 +453,42 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
void clear_prefree_segments(struct f2fs_sb_info *sbi)
{
+ struct list_head *head = &(SM_I(sbi)->discard_list);
+ struct discard_entry *entry, *this;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int segno = -1;
+ unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int total_segs = TOTAL_SEGS(sbi);
+ unsigned int start = 0, end = -1;
mutex_lock(&dirty_i->seglist_lock);
+
while (1) {
- segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
- segno + 1);
- if (segno >= total_segs)
+ int i;
+ start = find_next_bit(prefree_map, total_segs, end + 1);
+ if (start >= total_segs)
break;
+ end = find_next_zero_bit(prefree_map, total_segs, start + 1);
+
+ for (i = start; i < end; i++)
+ clear_bit(i, prefree_map);
+
+ dirty_i->nr_dirty[PRE] -= end - start;
- if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
- dirty_i->nr_dirty[PRE]--;
-
- /* Let's use trim */
- if (test_opt(sbi, DISCARD))
- blkdev_issue_discard(sbi->sb->s_bdev,
- START_BLOCK(sbi, segno) <<
- sbi->log_sectors_per_block,
- 1 << (sbi->log_sectors_per_block +
- sbi->log_blocks_per_seg),
- GFP_NOFS, 0);
+ if (!test_opt(sbi, DISCARD))
+ continue;
+
+ f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
+ (end - start) << sbi->log_blocks_per_seg);
}
mutex_unlock(&dirty_i->seglist_lock);
+
+ /* send small discards */
+ list_for_each_entry_safe(entry, this, head, list) {
+ f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
+ list_del(&entry->list);
+ SM_I(sbi)->nr_discards -= entry->len;
+ kmem_cache_free(discard_entry_slab, entry);
+ }
}
static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -193,9 +517,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
se = get_seg_entry(sbi, segno);
new_vblocks = se->valid_blocks + del;
- offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
- BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
+ f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
(new_vblocks > sbi->blocks_per_seg)));
se->valid_blocks = new_vblocks;
@@ -222,12 +546,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
get_sec_entry(sbi, segno)->valid_blocks += del;
}
-static void refresh_sit_entry(struct f2fs_sb_info *sbi,
- block_t old_blkaddr, block_t new_blkaddr)
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
{
- update_sit_entry(sbi, new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
- update_sit_entry(sbi, old_blkaddr, -1);
+ update_sit_entry(sbi, new, 1);
+ if (GET_SEGNO(sbi, old) != NULL_SEGNO)
+ update_sit_entry(sbi, old, -1);
+
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
}
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
@@ -235,7 +561,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
unsigned int segno = GET_SEGNO(sbi, addr);
struct sit_info *sit_i = SIT_I(sbi);
- BUG_ON(addr == NULL_ADDR);
+ f2fs_bug_on(addr == NULL_ADDR);
if (addr == NEW_ADDR)
return;
@@ -267,9 +593,8 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
*/
int npages_for_summary_flush(struct f2fs_sb_info *sbi)
{
- int total_size_bytes = 0;
int valid_sum_count = 0;
- int i, sum_space;
+ int i, sum_in_page;
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
if (sbi->ckpt->alloc_type[i] == SSR)
@@ -278,13 +603,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi)
valid_sum_count += curseg_blkoff(sbi, i);
}
- total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
- + sizeof(struct nat_journal) + 2
- + sizeof(struct sit_journal) + 2;
- sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
- if (total_size_bytes < sum_space)
+ sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
+ SUM_FOOTER_SIZE) / SUMMARY_SIZE;
+ if (valid_sum_count <= sum_in_page)
return 1;
- else if (total_size_bytes < 2 * sum_space)
+ else if ((valid_sum_count - sum_in_page) <=
+ (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
return 2;
return 3;
}
@@ -350,7 +674,7 @@ find_other_zone:
if (dir == ALLOC_RIGHT) {
secno = find_next_zero_bit(free_i->free_secmap,
TOTAL_SECS(sbi), 0);
- BUG_ON(secno >= TOTAL_SECS(sbi));
+ f2fs_bug_on(secno >= TOTAL_SECS(sbi));
} else {
go_left = 1;
left_start = hint - 1;
@@ -366,7 +690,7 @@ find_other_zone:
}
left_start = find_next_zero_bit(free_i->free_secmap,
TOTAL_SECS(sbi), 0);
- BUG_ON(left_start >= TOTAL_SECS(sbi));
+ f2fs_bug_on(left_start >= TOTAL_SECS(sbi));
break;
}
secno = left_start;
@@ -405,7 +729,7 @@ skip_left:
}
got_it:
/* set it as dirty segment in free segmap */
- BUG_ON(test_bit(segno, free_i->free_segmap));
+ f2fs_bug_on(test_bit(segno, free_i->free_segmap));
__set_inuse(sbi, segno);
*newseg = segno;
write_unlock(&free_i->segmap_lock);
@@ -458,13 +782,18 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,
struct curseg_info *seg, block_t start)
{
struct seg_entry *se = get_seg_entry(sbi, seg->segno);
- block_t ofs;
- for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
- if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
- && !f2fs_test_bit(ofs, se->cur_valid_map))
- break;
- }
- seg->next_blkoff = ofs;
+ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+ unsigned long target_map[entries];
+ unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+ unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+ int i, pos;
+
+ for (i = 0; i < entries; i++)
+ target_map[i] = ckpt_map[i] | cur_map[i];
+
+ pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
+
+ seg->next_blkoff = pos;
}
/*
@@ -550,9 +879,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
-#ifdef CONFIG_F2FS_STAT_FS
- sbi->segment_count[curseg->alloc_type]++;
-#endif
+
+ stat_inc_seg_type(sbi, curseg);
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -573,141 +901,6 @@ static const struct segment_allocation default_salloc_ops = {
.allocate_segment = allocate_segment_by_default,
};
-static void f2fs_end_io_write(struct bio *bio, int err)
-{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct bio_private *p = bio->bi_private;
-
- do {
- struct page *page = bvec->bv_page;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
- if (!uptodate) {
- SetPageError(page);
- if (page->mapping)
- set_bit(AS_EIO, &page->mapping->flags);
- set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
- p->sbi->sb->s_flags |= MS_RDONLY;
- }
- end_page_writeback(page);
- dec_page_count(p->sbi, F2FS_WRITEBACK);
- } while (bvec >= bio->bi_io_vec);
-
- if (p->is_sync)
- complete(p->wait);
- kfree(p);
- bio_put(bio);
-}
-
-struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
-{
- struct bio *bio;
-
- /* No failure on bio allocation */
- bio = bio_alloc(GFP_NOIO, npages);
- bio->bi_bdev = bdev;
- bio->bi_private = NULL;
-
- return bio;
-}
-
-static void do_submit_bio(struct f2fs_sb_info *sbi,
- enum page_type type, bool sync)
-{
- int rw = sync ? WRITE_SYNC : WRITE;
- enum page_type btype = type > META ? META : type;
-
- if (type >= META_FLUSH)
- rw = WRITE_FLUSH_FUA;
-
- if (btype == META)
- rw |= REQ_META;
-
- if (sbi->bio[btype]) {
- struct bio_private *p = sbi->bio[btype]->bi_private;
- p->sbi = sbi;
- sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
-
- trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]);
-
- if (type == META_FLUSH) {
- DECLARE_COMPLETION_ONSTACK(wait);
- p->is_sync = true;
- p->wait = &wait;
- submit_bio(rw, sbi->bio[btype]);
- wait_for_completion(&wait);
- } else {
- p->is_sync = false;
- submit_bio(rw, sbi->bio[btype]);
- }
- sbi->bio[btype] = NULL;
- }
-}
-
-void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
-{
- down_write(&sbi->bio_sem);
- do_submit_bio(sbi, type, sync);
- up_write(&sbi->bio_sem);
-}
-
-static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
- block_t blk_addr, enum page_type type)
-{
- struct block_device *bdev = sbi->sb->s_bdev;
-
- verify_block_addr(sbi, blk_addr);
-
- down_write(&sbi->bio_sem);
-
- inc_page_count(sbi, F2FS_WRITEBACK);
-
- if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
- do_submit_bio(sbi, type, false);
-alloc_new:
- if (sbi->bio[type] == NULL) {
- struct bio_private *priv;
-retry:
- priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
- if (!priv) {
- cond_resched();
- goto retry;
- }
-
- sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi));
- sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
- sbi->bio[type]->bi_private = priv;
- /*
- * The end_io will be assigned at the sumbission phase.
- * Until then, let bio_add_page() merge consecutive IOs as much
- * as possible.
- */
- }
-
- if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
- PAGE_CACHE_SIZE) {
- do_submit_bio(sbi, type, false);
- goto alloc_new;
- }
-
- sbi->last_block_in_bio[type] = blk_addr;
-
- up_write(&sbi->bio_sem);
- trace_f2fs_submit_write_page(page, blk_addr, type);
-}
-
-void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool sync)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
- if (PageWriteback(page)) {
- f2fs_submit_bio(sbi, type, sync);
- wait_on_page_writeback(page);
- }
-}
-
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -771,20 +964,18 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
return __get_segment_type_4(page, p_type);
}
/* NR_CURSEG_TYPE(6) logs by default */
- BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
+ f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE);
return __get_segment_type_6(page, p_type);
}
-static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
- block_t old_blkaddr, block_t *new_blkaddr,
- struct f2fs_summary *sum, enum page_type p_type)
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+ block_t old_blkaddr, block_t *new_blkaddr,
+ struct f2fs_summary *sum, int type)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
unsigned int old_cursegno;
- int type;
- type = __get_segment_type(page, p_type);
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
@@ -801,66 +992,78 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
mutex_lock(&sit_i->sentry_lock);
__refresh_next_blkoff(sbi, curseg);
-#ifdef CONFIG_F2FS_STAT_FS
- sbi->block_count[curseg->alloc_type]++;
-#endif
+ stat_inc_block_count(sbi, curseg);
+
+ if (!__has_curseg_space(sbi, type))
+ sit_i->s_ops->allocate_segment(sbi, type, false);
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
-
- if (!__has_curseg_space(sbi, type))
- sit_i->s_ops->allocate_segment(sbi, type, false);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+
mutex_unlock(&sit_i->sentry_lock);
- if (p_type == NODE)
+ if (page && IS_NODESEG(type))
fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
- /* writeout dirty page into bdev */
- submit_write_page(sbi, page, *new_blkaddr, p_type);
-
mutex_unlock(&curseg->curseg_mutex);
}
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
+ block_t old_blkaddr, block_t *new_blkaddr,
+ struct f2fs_summary *sum, struct f2fs_io_info *fio)
+{
+ int type = __get_segment_type(page, fio->type);
+
+ allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
+
+ /* writeout dirty page into bdev */
+ f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
+}
+
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
{
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = WRITE_SYNC | REQ_META | REQ_PRIO
+ };
+
set_page_writeback(page);
- submit_write_page(sbi, page, page->index, META);
+ f2fs_submit_page_mbio(sbi, page, page->index, &fio);
}
void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
+ struct f2fs_io_info *fio,
unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
{
struct f2fs_summary sum;
set_summary(&sum, nid, 0, 0);
- do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
+ do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
}
-void write_data_page(struct inode *inode, struct page *page,
- struct dnode_of_data *dn, block_t old_blkaddr,
- block_t *new_blkaddr)
+void write_data_page(struct page *page, struct dnode_of_data *dn,
+ block_t *new_blkaddr, struct f2fs_io_info *fio)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
struct f2fs_summary sum;
struct node_info ni;
- BUG_ON(old_blkaddr == NULL_ADDR);
+ f2fs_bug_on(dn->data_blkaddr == NULL_ADDR);
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
- do_write_page(sbi, page, old_blkaddr,
- new_blkaddr, &sum, DATA);
+ do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
}
-void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
- block_t old_blk_addr)
+void rewrite_data_page(struct page *page, block_t old_blkaddr,
+ struct f2fs_io_info *fio)
{
- submit_write_page(sbi, page, old_blk_addr, DATA);
+ struct inode *inode = page->mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio);
}
void recover_data_page(struct f2fs_sb_info *sbi,
@@ -896,14 +1099,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
- (sbi->blocks_per_seg - 1);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
@@ -919,6 +1119,10 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
unsigned int segno, old_cursegno;
block_t next_blkaddr = next_blkaddr_of_node(page);
unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
+ struct f2fs_io_info fio = {
+ .type = NODE,
+ .rw = WRITE_SYNC,
+ };
curseg = CURSEG_I(sbi, type);
@@ -933,8 +1137,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
curseg->next_segno = segno;
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
- (sbi->blocks_per_seg - 1);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
/* change the current log to the next block addr in advance */
@@ -942,22 +1145,54 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
curseg->next_segno = next_segno;
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
- (sbi->blocks_per_seg - 1);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
/* rewrite node page */
set_page_writeback(page);
- submit_write_page(sbi, page, new_blkaddr, NODE);
- f2fs_submit_bio(sbi, NODE, true);
+ f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
}
+static inline bool is_merged_page(struct f2fs_sb_info *sbi,
+ struct page *page, enum page_type type)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = &sbi->write_io[btype];
+ struct bio_vec *bvec;
+ int i;
+
+ down_read(&io->io_rwsem);
+ if (!io->bio)
+ goto out;
+
+ bio_for_each_segment_all(bvec, io->bio, i) {
+ if (page == bvec->bv_page) {
+ up_read(&io->io_rwsem);
+ return true;
+ }
+ }
+
+out:
+ up_read(&io->io_rwsem);
+ return false;
+}
+
+void f2fs_wait_on_page_writeback(struct page *page,
+ enum page_type type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+ if (PageWriteback(page)) {
+ if (is_merged_page(sbi, page, type))
+ f2fs_submit_merged_bio(sbi, type, WRITE);
+ wait_on_page_writeback(page);
+ }
+}
+
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1062,9 +1297,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
- if (restore_node_summary(sbi, segno, sum)) {
+ int err;
+
+ err = restore_node_summary(sbi, segno, sum);
+ if (err) {
f2fs_put_page(new, 1);
- return -EINVAL;
+ return err;
}
}
}
@@ -1085,6 +1323,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
int type = CURSEG_HOT_DATA;
+ int err;
if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
/* restore for compacted data summary */
@@ -1093,9 +1332,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
type = CURSEG_HOT_NODE;
}
- for (; type <= CURSEG_COLD_NODE; type++)
- if (read_normal_summaries(sbi, type))
- return -EINVAL;
+ for (; type <= CURSEG_COLD_NODE; type++) {
+ err = read_normal_summaries(sbi, type);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -1122,8 +1364,6 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
SUM_JOURNAL_SIZE);
written_size += SUM_JOURNAL_SIZE;
- set_page_dirty(page);
-
/* Step 3: write summary entries */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
unsigned short blkoff;
@@ -1142,18 +1382,20 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
summary = (struct f2fs_summary *)(kaddr + written_size);
*summary = seg_i->sum_blk->entries[j];
written_size += SUMMARY_SIZE;
- set_page_dirty(page);
if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
SUM_FOOTER_SIZE)
continue;
+ set_page_dirty(page);
f2fs_put_page(page, 1);
page = NULL;
}
}
- if (page)
+ if (page) {
+ set_page_dirty(page);
f2fs_put_page(page, 1);
+ }
}
static void write_normal_summaries(struct f2fs_sb_info *sbi,
@@ -1239,7 +1481,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
/* get current sit block page without lock */
src_page = get_meta_page(sbi, src_off);
dst_page = grab_meta_page(sbi, dst_off);
- BUG_ON(PageDirty(src_page));
+ f2fs_bug_on(PageDirty(src_page));
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
@@ -1271,9 +1513,9 @@ static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
__mark_sit_entry_dirty(sbi, segno);
}
update_sits_in_cursum(sum, -sits_in_cursum(sum));
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/*
@@ -1308,6 +1550,10 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
+ /* add discard candidates */
+ if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards)
+ add_discard_addrs(sbi, segno, se);
+
if (flushed)
goto to_sit_page;
@@ -1479,36 +1725,48 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- unsigned int start;
+ int sit_blk_cnt = SIT_BLK_CNT(sbi);
+ unsigned int i, start, end;
+ unsigned int readed, start_blk = 0;
+ int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
- for (start = 0; start < TOTAL_SEGS(sbi); start++) {
- struct seg_entry *se = &sit_i->sentries[start];
- struct f2fs_sit_block *sit_blk;
- struct f2fs_sit_entry sit;
- struct page *page;
- int i;
-
- mutex_lock(&curseg->curseg_mutex);
- for (i = 0; i < sits_in_cursum(sum); i++) {
- if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
- sit = sit_in_journal(sum, i);
- mutex_unlock(&curseg->curseg_mutex);
- goto got_it;
+ do {
+ readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
+
+ start = start_blk * sit_i->sents_per_block;
+ end = (start_blk + readed) * sit_i->sents_per_block;
+
+ for (; start < end && start < TOTAL_SEGS(sbi); start++) {
+ struct seg_entry *se = &sit_i->sentries[start];
+ struct f2fs_sit_block *sit_blk;
+ struct f2fs_sit_entry sit;
+ struct page *page;
+
+ mutex_lock(&curseg->curseg_mutex);
+ for (i = 0; i < sits_in_cursum(sum); i++) {
+ if (le32_to_cpu(segno_in_journal(sum, i))
+ == start) {
+ sit = sit_in_journal(sum, i);
+ mutex_unlock(&curseg->curseg_mutex);
+ goto got_it;
+ }
}
- }
- mutex_unlock(&curseg->curseg_mutex);
- page = get_current_sit_page(sbi, start);
- sit_blk = (struct f2fs_sit_block *)page_address(page);
- sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
- f2fs_put_page(page, 1);
+ mutex_unlock(&curseg->curseg_mutex);
+
+ page = get_current_sit_page(sbi, start);
+ sit_blk = (struct f2fs_sit_block *)page_address(page);
+ sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
+ f2fs_put_page(page, 1);
got_it:
- check_block_count(sbi, start, &sit);
- seg_info_from_raw_sit(se, &sit);
- if (sbi->segs_per_sec > 1) {
- struct sec_entry *e = get_sec_entry(sbi, start);
- e->valid_blocks += se->valid_blocks;
+ check_block_count(sbi, start, &sit);
+ seg_info_from_raw_sit(se, &sit);
+ if (sbi->segs_per_sec > 1) {
+ struct sec_entry *e = get_sec_entry(sbi, start);
+ e->valid_blocks += se->valid_blocks;
+ }
}
- }
+ start_blk += readed;
+ } while (start_blk < sit_blk_cnt);
}
static void init_free_segmap(struct f2fs_sb_info *sbi)
@@ -1628,8 +1886,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
/* init sm info */
sbi->sm_info = sm_info;
- INIT_LIST_HEAD(&sm_info->wblist_head);
- spin_lock_init(&sm_info->wblist_lock);
sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
@@ -1637,6 +1893,20 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+ sm_info->rec_prefree_segments = sm_info->main_segments *
+ DEF_RECLAIM_PREFREE_SEGMENTS / 100;
+ sm_info->ipu_policy = F2FS_IPU_DISABLE;
+ sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
+
+ INIT_LIST_HEAD(&sm_info->discard_list);
+ sm_info->nr_discards = 0;
+ sm_info->max_discards = 0;
+
+ if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ return err;
+ }
err = build_sit_info(sbi);
if (err)
@@ -1744,6 +2014,10 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
void destroy_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
+
+ if (!sm_info)
+ return;
+ destroy_flush_cmd_control(sbi);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
@@ -1751,3 +2025,17 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
sbi->sm_info = NULL;
kfree(sm_info);
}
+
+int __init create_segment_manager_caches(void)
+{
+ discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
+ sizeof(struct discard_entry));
+ if (!discard_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void destroy_segment_manager_caches(void)
+{
+ kmem_cache_destroy(discard_entry_slab);
+}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index bdd10eab8c4..7091204680f 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,17 +14,14 @@
#define NULL_SEGNO ((unsigned int)(~0))
#define NULL_SECNO ((unsigned int)(~0))
+#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
+
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
-#define IS_DATASEG(t) \
- ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \
- (t == CURSEG_WARM_DATA))
-
-#define IS_NODESEG(t) \
- ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \
- (t == CURSEG_WARM_NODE))
+#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA)
+#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE)
#define IS_CURSEG(sbi, seg) \
((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
@@ -60,6 +57,9 @@
((blk_addr) - SM_I(sbi)->seg0_blkaddr)
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
+#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
+ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
+
#define GET_SEGNO(sbi, blk_addr) \
(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
@@ -81,22 +81,19 @@
(segno / SIT_ENTRY_PER_BLOCK)
#define START_SEGNO(sit_i, segno) \
(SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK)
+#define SIT_BLK_CNT(sbi) \
+ ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
#define f2fs_bitmap_size(nr) \
(BITS_TO_LONGS(nr) * sizeof(unsigned long))
#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
#define TOTAL_SECS(sbi) (sbi->total_sections)
#define SECTOR_FROM_BLOCK(sbi, blk_addr) \
- (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
+ (((sector_t)blk_addr) << (sbi)->log_sectors_per_block)
#define SECTOR_TO_BLOCK(sbi, sectors) \
- (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
-
-/* during checkpoint, bio_private is used to synchronize the last bio */
-struct bio_private {
- struct f2fs_sb_info *sbi;
- bool is_sync;
- void *wait;
-};
+ (sectors >> (sbi)->log_sectors_per_block)
+#define MAX_BIO_BLOCKS(max_hw_blocks) \
+ (min((int)max_hw_blocks, BIO_MAX_PAGES))
/*
* indicate a block allocation direction: RIGHT and LEFT.
@@ -383,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
static inline block_t written_block_count(struct f2fs_sb_info *sbi)
{
- struct sit_info *sit_i = SIT_I(sbi);
- block_t vblocks;
-
- mutex_lock(&sit_i->sentry_lock);
- vblocks = sit_i->written_valid_blocks;
- mutex_unlock(&sit_i->sentry_lock);
-
- return vblocks;
+ return SIT_I(sbi)->written_valid_blocks;
}
static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
{
- struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int free_segs;
-
- read_lock(&free_i->segmap_lock);
- free_segs = free_i->free_segments;
- read_unlock(&free_i->segmap_lock);
-
- return free_segs;
+ return FREE_I(sbi)->free_segments;
}
static inline int reserved_segments(struct f2fs_sb_info *sbi)
@@ -412,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)
static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
{
- struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int free_secs;
-
- read_lock(&free_i->segmap_lock);
- free_secs = free_i->free_sections;
- read_unlock(&free_i->segmap_lock);
-
- return free_secs;
+ return FREE_I(sbi)->free_sections;
}
static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
@@ -454,8 +430,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
- return ((prefree_segments(sbi) / sbi->segs_per_sec)
- + free_sections(sbi) < overprovision_sections(sbi));
+ return (prefree_segments(sbi) / sbi->segs_per_sec)
+ + free_sections(sbi) < overprovision_sections(sbi);
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
@@ -463,33 +439,71 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
- if (sbi->por_doing)
+ if (unlikely(sbi->por_doing))
return false;
- return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
- reserved_sections(sbi)));
+ return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
+ reserved_sections(sbi));
+}
+
+static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
+{
+ return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
}
static inline int utilization(struct f2fs_sb_info *sbi)
{
- return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count);
+ return div_u64((u64)valid_user_blocks(sbi) * 100,
+ sbi->user_block_count);
}
/*
* Sometimes f2fs may be better to drop out-of-place update policy.
- * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write
- * data in the original place likewise other traditional file systems.
- * But, currently set 100 in percentage, which means it is disabled.
- * See below need_inplace_update().
+ * And, users can control the policy through sysfs entries.
+ * There are five policies with triggering conditions as follows.
+ * F2FS_IPU_FORCE - all the time,
+ * F2FS_IPU_SSR - if SSR mode is activated,
+ * F2FS_IPU_UTIL - if FS utilization is over threashold,
+ * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over
+ * threashold,
+ * F2FS_IPUT_DISABLE - disable IPU. (=default option)
*/
-#define MIN_IPU_UTIL 100
+#define DEF_MIN_IPU_UTIL 70
+
+enum {
+ F2FS_IPU_FORCE,
+ F2FS_IPU_SSR,
+ F2FS_IPU_UTIL,
+ F2FS_IPU_SSR_UTIL,
+ F2FS_IPU_DISABLE,
+};
+
static inline bool need_inplace_update(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+
+ /* IPU can be done only for the user data */
if (S_ISDIR(inode->i_mode))
return false;
- if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL)
+
+ switch (SM_I(sbi)->ipu_policy) {
+ case F2FS_IPU_FORCE:
return true;
+ case F2FS_IPU_SSR:
+ if (need_SSR(sbi))
+ return true;
+ break;
+ case F2FS_IPU_UTIL:
+ if (utilization(sbi) > SM_I(sbi)->min_ipu_util)
+ return true;
+ break;
+ case F2FS_IPU_SSR_UTIL:
+ if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
+ return true;
+ break;
+ case F2FS_IPU_DISABLE:
+ break;
+ }
return false;
}
@@ -513,16 +527,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
return curseg->next_blkoff;
}
+#ifdef CONFIG_F2FS_CHECK_FS
static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
{
unsigned int end_segno = SM_I(sbi)->segment_count - 1;
BUG_ON(segno > end_segno);
}
-/*
- * This function is used for only debugging.
- * NOTE: In future, we have to remove this function.
- */
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
@@ -541,8 +552,9 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
unsigned int end_segno = sm_info->segment_count - 1;
+ bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
- int i;
+ int cur_pos = 0, next_pos;
/* check segment usage */
BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
@@ -551,11 +563,26 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
BUG_ON(segno > end_segno);
/* check bitmap with valid block count */
- for (i = 0; i < sbi->blocks_per_seg; i++)
- if (f2fs_test_bit(i, raw_sit->valid_map))
- valid_blocks++;
+ do {
+ if (is_valid) {
+ next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ cur_pos);
+ valid_blocks += next_pos - cur_pos;
+ } else
+ next_pos = find_next_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ cur_pos);
+ cur_pos = next_pos;
+ is_valid = !is_valid;
+ } while (cur_pos < sbi->blocks_per_seg);
BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
}
+#else
+#define check_seg_range(sbi, segno)
+#define verify_block_addr(sbi, blk_addr)
+#define check_block_count(sbi, segno, raw_sit)
+#endif
static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
unsigned int start)
@@ -637,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
struct request_queue *q = bdev_get_queue(bdev);
return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
}
+
+/*
+ * It is very important to gather dirty pages and write at once, so that we can
+ * submit a big bio without interfering other data writes.
+ * By default, 512 pages for directory data,
+ * 512 pages (2MB) * 3 for three types of nodes, and
+ * max_bio_blocks for meta are set.
+ */
+static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
+{
+ if (type == DATA)
+ return sbi->blocks_per_seg;
+ else if (type == NODE)
+ return 3 * sbi->blocks_per_seg;
+ else if (type == META)
+ return MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+ else
+ return 0;
+}
+
+/*
+ * When writing pages, it'd better align nr_to_write for segment size.
+ */
+static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
+ struct writeback_control *wbc)
+{
+ long nr_to_write, desired;
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ return 0;
+
+ nr_to_write = wbc->nr_to_write;
+
+ if (type == DATA)
+ desired = 4096;
+ else if (type == NODE)
+ desired = 3 * max_hw_blocks(sbi);
+ else
+ desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+
+ wbc->nr_to_write = desired;
+ return desired - nr_to_write;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 13d0a0fe49d..8f96d9372ad 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -43,11 +43,15 @@ enum {
Opt_disable_roll_forward,
Opt_discard,
Opt_noheap,
+ Opt_user_xattr,
Opt_nouser_xattr,
+ Opt_acl,
Opt_noacl,
Opt_active_logs,
Opt_disable_ext_identify,
Opt_inline_xattr,
+ Opt_inline_data,
+ Opt_flush_merge,
Opt_err,
};
@@ -56,33 +60,59 @@ static match_table_t f2fs_tokens = {
{Opt_disable_roll_forward, "disable_roll_forward"},
{Opt_discard, "discard"},
{Opt_noheap, "no_heap"},
+ {Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
+ {Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_active_logs, "active_logs=%u"},
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
+ {Opt_inline_data, "inline_data"},
+ {Opt_flush_merge, "flush_merge"},
{Opt_err, NULL},
};
/* Sysfs support for f2fs */
+enum {
+ GC_THREAD, /* struct f2fs_gc_thread */
+ SM_INFO, /* struct f2fs_sm_info */
+ NM_INFO, /* struct f2fs_nm_info */
+ F2FS_SBI, /* struct f2fs_sb_info */
+};
+
struct f2fs_attr {
struct attribute attr;
ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
const char *, size_t);
+ int struct_type;
int offset;
};
+static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
+{
+ if (struct_type == GC_THREAD)
+ return (unsigned char *)sbi->gc_thread;
+ else if (struct_type == SM_INFO)
+ return (unsigned char *)SM_I(sbi);
+ else if (struct_type == NM_INFO)
+ return (unsigned char *)NM_I(sbi);
+ else if (struct_type == F2FS_SBI)
+ return (unsigned char *)sbi;
+ return NULL;
+}
+
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
- struct f2fs_gc_kthread *gc_kth = sbi->gc_thread;
+ unsigned char *ptr = NULL;
unsigned int *ui;
- if (!gc_kth)
+ ptr = __struct_ptr(sbi, a->struct_type);
+ if (!ptr)
return -EINVAL;
- ui = (unsigned int *)(((char *)gc_kth) + a->offset);
+ ui = (unsigned int *)(ptr + a->offset);
return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
}
@@ -91,15 +121,16 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
struct f2fs_sb_info *sbi,
const char *buf, size_t count)
{
- struct f2fs_gc_kthread *gc_kth = sbi->gc_thread;
+ unsigned char *ptr;
unsigned long t;
unsigned int *ui;
ssize_t ret;
- if (!gc_kth)
+ ptr = __struct_ptr(sbi, a->struct_type);
+ if (!ptr)
return -EINVAL;
- ui = (unsigned int *)(((char *)gc_kth) + a->offset);
+ ui = (unsigned int *)(ptr + a->offset);
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret < 0)
@@ -135,21 +166,31 @@ static void f2fs_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
-#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \
+#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
static struct f2fs_attr f2fs_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
- .offset = offsetof(struct f2fs_gc_kthread, _elname), \
+ .struct_type = _struct_type, \
+ .offset = _offset \
}
-#define F2FS_RW_ATTR(name, elname) \
- F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname)
-
-F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time);
-F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time);
-F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(gc_idle, gc_idle);
+#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
+ F2FS_ATTR_OFFSET(struct_type, name, 0644, \
+ f2fs_sbi_show, f2fs_sbi_store, \
+ offsetof(struct struct_name, elname))
+
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -157,6 +198,13 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_max_sleep_time),
ATTR_LIST(gc_no_gc_sleep_time),
ATTR_LIST(gc_idle),
+ ATTR_LIST(reclaim_segments),
+ ATTR_LIST(max_small_discards),
+ ATTR_LIST(ipu_policy),
+ ATTR_LIST(min_ipu_util),
+ ATTR_LIST(max_victim_search),
+ ATTR_LIST(dir_level),
+ ATTR_LIST(ram_thresh),
NULL,
};
@@ -217,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options)
if (!name)
return -ENOMEM;
- if (!strncmp(name, "on", 2))
+ if (strlen(name) == 2 && !strncmp(name, "on", 2))
set_opt(sbi, BG_GC);
- else if (!strncmp(name, "off", 3))
+ else if (strlen(name) == 3 && !strncmp(name, "off", 3))
clear_opt(sbi, BG_GC);
else {
kfree(name);
@@ -237,6 +285,9 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, NOHEAP);
break;
#ifdef CONFIG_F2FS_FS_XATTR
+ case Opt_user_xattr:
+ set_opt(sbi, XATTR_USER);
+ break;
case Opt_nouser_xattr:
clear_opt(sbi, XATTR_USER);
break;
@@ -244,6 +295,10 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, INLINE_XATTR);
break;
#else
+ case Opt_user_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "user_xattr options not supported");
+ break;
case Opt_nouser_xattr:
f2fs_msg(sb, KERN_INFO,
"nouser_xattr options not supported");
@@ -254,10 +309,16 @@ static int parse_options(struct super_block *sb, char *options)
break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
+ case Opt_acl:
+ set_opt(sbi, POSIX_ACL);
+ break;
case Opt_noacl:
clear_opt(sbi, POSIX_ACL);
break;
#else
+ case Opt_acl:
+ f2fs_msg(sb, KERN_INFO, "acl options not supported");
+ break;
case Opt_noacl:
f2fs_msg(sb, KERN_INFO, "noacl options not supported");
break;
@@ -272,6 +333,12 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_disable_ext_identify:
set_opt(sbi, DISABLE_EXT_IDENTIFY);
break;
+ case Opt_inline_data:
+ set_opt(sbi, INLINE_DATA);
+ break;
+ case Opt_flush_merge:
+ set_opt(sbi, FLUSH_MERGE);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -286,7 +353,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
{
struct f2fs_inode_info *fi;
- fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO);
+ fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
if (!fi)
return NULL;
@@ -298,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
fi->i_current_depth = 1;
fi->i_advise = 0;
rwlock_init(&fi->ext.ext_lock);
+ init_rwsem(&fi->i_sem);
set_inode_flag(fi, FI_NEW_INODE);
if (test_opt(F2FS_SB(sb), INLINE_XATTR))
set_inode_flag(fi, FI_INLINE_XATTR);
+ /* Will be used by directory only */
+ fi->i_dir_level = F2FS_SB(sb)->dir_level;
+
return &fi->vfs_inode;
}
@@ -355,7 +426,9 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_stats(sbi);
stop_gc_thread(sbi);
- write_checkpoint(sbi, true);
+ /* We don't need to do checkpoint when it's clean */
+ if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES))
+ write_checkpoint(sbi, true);
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -441,7 +514,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
{
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
- if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC))
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
seq_printf(seq, ",background_gc=%s", "on");
else
seq_printf(seq, ",background_gc=%s", "off");
@@ -467,7 +540,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
#endif
if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
seq_puts(seq, ",disable_ext_identify");
-
+ if (test_opt(sbi, INLINE_DATA))
+ seq_puts(seq, ",inline_data");
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
+ seq_puts(seq, ",flush_merge");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -477,16 +553,26 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main);
+ unsigned int total_segs =
+ le32_to_cpu(sbi->raw_super->segment_count_main);
int i;
+ seq_puts(seq, "format: segment_type|valid_blocks\n"
+ "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
for (i = 0; i < total_segs; i++) {
- seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1));
- if (i != 0 && (i % 10) == 0)
- seq_puts(seq, "\n");
+ struct seg_entry *se = get_seg_entry(sbi, i);
+
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-5d", i);
+ seq_printf(seq, "%d|%-3u", se->type,
+ get_valid_blocks(sbi, i, 1));
+ if ((i % 10) == 9 || i == (total_segs - 1))
+ seq_putc(seq, '\n');
else
- seq_puts(seq, " ");
+ seq_putc(seq, ' ');
}
+
return 0;
}
@@ -508,6 +594,10 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct f2fs_mount_info org_mount_opt;
int err, active_logs;
+ bool need_restart_gc = false;
+ bool need_stop_gc = false;
+
+ sync_filesystem(sb);
/*
* Save the old mount options in case we
@@ -523,7 +613,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
/*
* Previous and new state of filesystem is RO,
- * so no point in checking GC conditions.
+ * so skip checking GC and FLUSH_MERGE conditions.
*/
if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
goto skip;
@@ -537,18 +627,40 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (sbi->gc_thread) {
stop_gc_thread(sbi);
f2fs_sync_fs(sb, 1);
+ need_restart_gc = true;
}
} else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
err = start_gc_thread(sbi);
if (err)
goto restore_opts;
+ need_stop_gc = true;
+ }
+
+ /*
+ * We stop issue flush thread if FS is mounted as RO
+ * or if flush_merge is not passed in mount option.
+ */
+ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+ destroy_flush_cmd_control(sbi);
+ } else if (test_opt(sbi, FLUSH_MERGE) &&
+ !sbi->sm_info->cmd_control_info) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ goto restore_gc;
}
skip:
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
return 0;
-
+restore_gc:
+ if (need_restart_gc) {
+ if (start_gc_thread(sbi))
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "background gc thread is stop");
+ } else if (need_stop_gc) {
+ stop_gc_thread(sbi);
+ }
restore_opts:
sbi->mount_opt = org_mount_opt;
sbi->active_logs = active_logs;
@@ -577,7 +689,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
- if (ino < F2FS_ROOT_INO(sbi))
+ if (check_nid_range(sbi, ino))
return ERR_PTR(-ESTALE);
/*
@@ -588,7 +700,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
inode = f2fs_iget(sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
- if (generation && inode->i_generation != generation) {
+ if (unlikely(generation && inode->i_generation != generation)) {
/* we didn't find the right inode.. */
iput(inode);
return ERR_PTR(-ESTALE);
@@ -691,10 +803,10 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
- if (fsmeta >= total)
+ if (unlikely(fsmeta >= total))
return 1;
- if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
+ if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
}
@@ -722,35 +834,56 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
sbi->cur_victim_sec = NULL_SECNO;
+ sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
+
+ sbi->dir_level = DEF_DIR_LEVEL;
}
-static int validate_superblock(struct super_block *sb,
- struct f2fs_super_block **raw_super,
- struct buffer_head **raw_super_buf, sector_t block)
+/*
+ * Read f2fs raw super block.
+ * Because we have two copies of super block, so read the first one at first,
+ * if the first one is invalid, move to read the second one.
+ */
+static int read_raw_super_block(struct super_block *sb,
+ struct f2fs_super_block **raw_super,
+ struct buffer_head **raw_super_buf)
{
- const char *super = (block == 0 ? "first" : "second");
+ int block = 0;
- /* read f2fs raw super block */
+retry:
*raw_super_buf = sb_bread(sb, block);
if (!*raw_super_buf) {
- f2fs_msg(sb, KERN_ERR, "unable to read %s superblock",
- super);
- return -EIO;
+ f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
+ block + 1);
+ if (block == 0) {
+ block++;
+ goto retry;
+ } else {
+ return -EIO;
+ }
}
*raw_super = (struct f2fs_super_block *)
((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
/* sanity checking of raw super */
- if (!sanity_check_raw_super(sb, *raw_super))
- return 0;
+ if (sanity_check_raw_super(sb, *raw_super)) {
+ brelse(*raw_super_buf);
+ f2fs_msg(sb, KERN_ERR,
+ "Can't find valid F2FS filesystem in %dth superblock",
+ block + 1);
+ if (block == 0) {
+ block++;
+ goto retry;
+ } else {
+ return -EINVAL;
+ }
+ }
- f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
- "in %s superblock", super);
- return -EINVAL;
+ return 0;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -768,19 +901,15 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
/* set a block size */
- if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) {
+ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
goto free_sbi;
}
- err = validate_superblock(sb, &raw_super, &raw_super_buf, 0);
- if (err) {
- brelse(raw_super_buf);
- /* check secondary superblock when primary failed */
- err = validate_superblock(sb, &raw_super, &raw_super_buf, 1);
- if (err)
- goto free_sb_buf;
- }
+ err = read_raw_super_block(sb, &raw_super, &raw_super_buf);
+ if (err)
+ goto free_sbi;
+
sb->s_fs_info = sbi;
/* init some FS parameters */
sbi->active_logs = NR_CURSEG_TYPE;
@@ -818,12 +947,21 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
- for (i = 0; i < NR_GLOBAL_LOCKS; i++)
- mutex_init(&sbi->fs_lock[i]);
mutex_init(&sbi->node_write);
- sbi->por_doing = 0;
+ sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
- init_rwsem(&sbi->bio_sem);
+
+ init_rwsem(&sbi->read_io.io_rwsem);
+ sbi->read_io.sbi = sbi;
+ sbi->read_io.bio = NULL;
+ for (i = 0; i < NR_PAGE_TYPE; i++) {
+ init_rwsem(&sbi->write_io[i].io_rwsem);
+ sbi->write_io[i].sbi = sbi;
+ sbi->write_io[i].bio = NULL;
+ }
+
+ init_rwsem(&sbi->cp_rwsem);
+ init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
/* get an inode for meta space */
@@ -886,9 +1024,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
}
/* if there are nt orphan nodes free them */
- err = -EINVAL;
- if (recover_orphan_inodes(sbi))
- goto free_node_inode;
+ recover_orphan_inodes(sbi);
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
@@ -897,8 +1033,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
err = PTR_ERR(root);
goto free_node_inode;
}
- if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size)
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ err = -EINVAL;
goto free_root_inode;
+ }
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
@@ -906,28 +1044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_root_inode;
}
- /* recover fsynced data */
- if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
- err = recover_fsync_data(sbi);
- if (err)
- f2fs_msg(sb, KERN_ERR,
- "Cannot recover all fsync data errno=%ld", err);
- }
-
- /*
- * If filesystem is not mounted as read-only then
- * do start the gc_thread.
- */
- if (!(sb->s_flags & MS_RDONLY)) {
- /* After POR, we can run background GC thread.*/
- err = start_gc_thread(sbi);
- if (err)
- goto fail;
- }
-
err = f2fs_build_stats(sbi);
if (err)
- goto fail;
+ goto free_root_inode;
if (f2fs_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -949,11 +1068,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
"%s", sb->s_id);
if (err)
- goto fail;
+ goto free_proc;
+ /* recover fsynced data */
+ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+ err = recover_fsync_data(sbi);
+ if (err)
+ f2fs_msg(sb, KERN_ERR,
+ "Cannot recover all fsync data errno=%ld", err);
+ }
+
+ /*
+ * If filesystem is not mounted as read-only then
+ * do start the gc_thread.
+ */
+ if (!(sb->s_flags & MS_RDONLY)) {
+ /* After POR, we can run background GC thread.*/
+ err = start_gc_thread(sbi);
+ if (err)
+ goto free_kobj;
+ }
return 0;
-fail:
- stop_gc_thread(sbi);
+
+free_kobj:
+ kobject_del(&sbi->s_kobj);
+free_proc:
+ if (sbi->s_proc) {
+ remove_proc_entry("segment_info", sbi->s_proc);
+ remove_proc_entry(sb->s_id, f2fs_proc_root);
+ }
+ f2fs_destroy_stats(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
@@ -993,8 +1137,8 @@ MODULE_ALIAS_FS("f2fs");
static int __init init_inodecache(void)
{
f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
- sizeof(struct f2fs_inode_info), NULL);
- if (f2fs_inode_cachep == NULL)
+ sizeof(struct f2fs_inode_info));
+ if (!f2fs_inode_cachep)
return -ENOMEM;
return 0;
}
@@ -1019,9 +1163,12 @@ static int __init init_f2fs_fs(void)
err = create_node_manager_caches();
if (err)
goto free_inodecache;
- err = create_gc_caches();
+ err = create_segment_manager_caches();
if (err)
goto free_node_manager_caches;
+ err = create_gc_caches();
+ if (err)
+ goto free_segment_manager_caches;
err = create_checkpoint_caches();
if (err)
goto free_gc_caches;
@@ -1043,6 +1190,8 @@ free_checkpoint_caches:
destroy_checkpoint_caches();
free_gc_caches:
destroy_gc_caches();
+free_segment_manager_caches:
+ destroy_segment_manager_caches();
free_node_manager_caches:
destroy_node_manager_caches();
free_inodecache:
@@ -1058,6 +1207,7 @@ static void __exit exit_f2fs_fs(void)
unregister_filesystem(&f2fs_fs_type);
destroy_checkpoint_caches();
destroy_gc_caches();
+ destroy_segment_manager_caches();
destroy_node_manager_caches();
destroy_inodecache();
kset_unregister(f2fs_kset);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 1ac8a5f6e38..8bea941ee30 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -21,11 +21,12 @@
#include <linux/rwsem.h>
#include <linux/f2fs_fs.h>
#include <linux/security.h>
+#include <linux/posix_acl_xattr.h>
#include "f2fs.h"
#include "xattr.h"
static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
- size_t list_size, const char *name, size_t name_len, int type)
+ size_t list_size, const char *name, size_t len, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
int total_len, prefix_len = 0;
@@ -52,11 +53,11 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
return -EINVAL;
}
- total_len = prefix_len + name_len + 1;
+ total_len = prefix_len + len + 1;
if (list && total_len <= list_size) {
memcpy(list, prefix, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
+ memcpy(list + prefix_len, name, len);
+ list[prefix_len + len] = '\0';
}
return total_len;
}
@@ -107,11 +108,12 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
if (strcmp(name, "") == 0)
return -EINVAL;
- return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL);
+ return f2fs_setxattr(dentry->d_inode, type, name,
+ value, size, NULL, flags);
}
static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
- size_t list_size, const char *name, size_t name_len, int type)
+ size_t list_size, const char *name, size_t len, int type)
{
const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
size_t size;
@@ -163,7 +165,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
xattr->name, xattr->value,
- xattr->value_len, (struct page *)page);
+ xattr->value_len, (struct page *)page, 0);
if (err < 0)
break;
}
@@ -213,8 +215,8 @@ const struct xattr_handler f2fs_xattr_security_handler = {
static const struct xattr_handler *f2fs_xattr_handler_map[] = {
[F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
- [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler,
- [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler,
+ [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
+ [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
#endif
[F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,
#ifdef CONFIG_F2FS_FS_SECURITY
@@ -226,8 +228,8 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = {
const struct xattr_handler *f2fs_xattr_handlers[] = {
&f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
- &f2fs_xattr_acl_access_handler,
- &f2fs_xattr_acl_default_handler,
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
#endif
&f2fs_xattr_trusted_handler,
#ifdef CONFIG_F2FS_FS_SECURITY
@@ -237,26 +239,26 @@ const struct xattr_handler *f2fs_xattr_handlers[] = {
NULL,
};
-static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
+static inline const struct xattr_handler *f2fs_xattr_handler(int index)
{
const struct xattr_handler *handler = NULL;
- if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map))
- handler = f2fs_xattr_handler_map[name_index];
+ if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map))
+ handler = f2fs_xattr_handler_map[index];
return handler;
}
-static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index,
- size_t name_len, const char *name)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
- if (entry->e_name_index != name_index)
+ if (entry->e_name_index != index)
continue;
- if (entry->e_name_len != name_len)
+ if (entry->e_name_len != len)
continue;
- if (!memcmp(entry->e_name, name, name_len))
+ if (!memcmp(entry->e_name, name, len))
break;
}
return entry;
@@ -271,7 +273,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
inline_size = inline_xattr_size(inode);
- txattr_addr = kzalloc(inline_size + size, GFP_KERNEL);
+ txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
if (!txattr_addr)
return NULL;
@@ -343,6 +345,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
@@ -350,6 +353,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
return PTR_ERR(page);
}
inline_addr = inline_xattr_addr(page);
+ f2fs_wait_on_page_writeback(page, NODE);
}
memcpy(inline_addr, txattr_addr, inline_size);
f2fs_put_page(page, 1);
@@ -369,7 +373,8 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(xpage);
}
- BUG_ON(new_nid);
+ f2fs_bug_on(new_nid);
+ f2fs_wait_on_page_writeback(xpage, NODE);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -392,40 +397,43 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
return 0;
}
-int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
+int f2fs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
int error = 0;
- size_t value_len, name_len;
+ size_t size, len;
if (name == NULL)
return -EINVAL;
- name_len = strlen(name);
+
+ len = strlen(name);
+ if (len > F2FS_NAME_LEN)
+ return -ERANGE;
base_addr = read_all_xattrs(inode, NULL);
if (!base_addr)
return -ENOMEM;
- entry = __find_xattr(base_addr, name_index, name_len, name);
+ entry = __find_xattr(base_addr, index, len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
- value_len = le16_to_cpu(entry->e_value_size);
+ size = le16_to_cpu(entry->e_value_size);
- if (buffer && value_len > buffer_size) {
+ if (buffer && size > buffer_size) {
error = -ERANGE;
goto cleanup;
}
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
- memcpy(buffer, pval, value_len);
+ memcpy(buffer, pval, size);
}
- error = value_len;
+ error = size;
cleanup:
kzfree(base_addr);
@@ -469,16 +477,15 @@ cleanup:
return error;
}
-int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
- const void *value, size_t value_len, struct page *ipage)
+static int __f2fs_setxattr(struct inode *inode, int index,
+ const char *name, const void *value, size_t size,
+ struct page *ipage, int flags)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_xattr_entry *here, *last;
void *base_addr;
int found, newsize;
- size_t name_len;
- int ilock;
+ size_t len;
__u32 new_hsize;
int error = -ENOMEM;
@@ -486,32 +493,35 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
return -EINVAL;
if (value == NULL)
- value_len = 0;
+ size = 0;
- name_len = strlen(name);
+ len = strlen(name);
- if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode))
+ if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode))
return -ERANGE;
- f2fs_balance_fs(sbi);
-
- ilock = mutex_lock_op(sbi);
-
base_addr = read_all_xattrs(inode, ipage);
if (!base_addr)
goto exit;
/* find entry with wanted name. */
- here = __find_xattr(base_addr, name_index, name_len, name);
+ here = __find_xattr(base_addr, index, len, name);
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
- last = here;
+ if ((flags & XATTR_REPLACE) && !found) {
+ error = -ENODATA;
+ goto exit;
+ } else if ((flags & XATTR_CREATE) && found) {
+ error = -EEXIST;
+ goto exit;
+ }
+
+ last = here;
while (!IS_XATTR_LAST_ENTRY(last))
last = XATTR_NEXT_ENTRY(last);
- newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
- name_len + value_len);
+ newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
/* 1. Check space */
if (value) {
@@ -522,9 +532,9 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
*/
free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
if (found)
- free = free - ENTRY_SIZE(here);
+ free = free + ENTRY_SIZE(here);
- if (free < newsize) {
+ if (unlikely(free < newsize)) {
error = -ENOSPC;
goto exit;
}
@@ -554,12 +564,12 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
* We just write new entry.
*/
memset(last, 0, newsize);
- last->e_name_index = name_index;
- last->e_name_len = name_len;
- memcpy(last->e_name, name, name_len);
- pval = last->e_name + name_len;
- memcpy(pval, value, value_len);
- last->e_value_size = cpu_to_le16(value_len);
+ last->e_name_index = index;
+ last->e_name_len = len;
+ memcpy(last->e_name, name, len);
+ pval = last->e_name + len;
+ memcpy(pval, value, size);
+ last->e_value_size = cpu_to_le16(size);
new_hsize += newsize;
}
@@ -578,7 +588,29 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
else
update_inode_page(inode);
exit:
- mutex_unlock_op(sbi, ilock);
kzfree(base_addr);
return error;
}
+
+int f2fs_setxattr(struct inode *inode, int index, const char *name,
+ const void *value, size_t size,
+ struct page *ipage, int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ int err;
+
+ /* this case is only from init_inode_metadata */
+ if (ipage)
+ return __f2fs_setxattr(inode, index, name, value,
+ size, ipage, flags);
+ f2fs_balance_fs(sbi);
+
+ f2fs_lock_op(sbi);
+ /* protect xattr_ver */
+ down_write(&F2FS_I(inode)->i_sem);
+ err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
+ up_write(&F2FS_I(inode)->i_sem);
+ f2fs_unlock_op(sbi);
+
+ return err;
+}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 02a08fb88a1..34ab7dbcf5e 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -108,26 +108,24 @@ struct f2fs_xattr_entry {
#ifdef CONFIG_F2FS_FS_XATTR
extern const struct xattr_handler f2fs_xattr_user_handler;
extern const struct xattr_handler f2fs_xattr_trusted_handler;
-extern const struct xattr_handler f2fs_xattr_acl_access_handler;
-extern const struct xattr_handler f2fs_xattr_acl_default_handler;
extern const struct xattr_handler f2fs_xattr_advise_handler;
extern const struct xattr_handler f2fs_xattr_security_handler;
extern const struct xattr_handler *f2fs_xattr_handlers[];
extern int f2fs_setxattr(struct inode *, int, const char *,
- const void *, size_t, struct page *);
+ const void *, size_t, struct page *, int);
extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);
extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
#else
#define f2fs_xattr_handlers NULL
-static inline int f2fs_setxattr(struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len)
+static inline int f2fs_setxattr(struct inode *inode, int index,
+ const char *name, const void *value, size_t size, int flags)
{
return -EOPNOTSUPP;
}
-static inline int f2fs_getxattr(struct inode *inode, int name_index,
+static inline int f2fs_getxattr(struct inode *inode, int index,
const char *name, void *buffer, size_t buffer_size)
{
return -EOPNOTSUPP;