aboutsummaryrefslogtreecommitdiff
path: root/fs/f2fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/Kconfig20
-rw-r--r--fs/f2fs/Makefile2
-rw-r--r--fs/f2fs/acl.c182
-rw-r--r--fs/f2fs/acl.h14
-rw-r--r--fs/f2fs/checkpoint.c597
-rw-r--r--fs/f2fs/data.c880
-rw-r--r--fs/f2fs/debug.c111
-rw-r--r--fs/f2fs/dir.c388
-rw-r--r--fs/f2fs/f2fs.h603
-rw-r--r--fs/f2fs/file.c410
-rw-r--r--fs/f2fs/gc.c258
-rw-r--r--fs/f2fs/gc.h38
-rw-r--r--fs/f2fs/inline.c250
-rw-r--r--fs/f2fs/inode.c175
-rw-r--r--fs/f2fs/namei.c124
-rw-r--r--fs/f2fs/node.c1056
-rw-r--r--fs/f2fs/node.h156
-rw-r--r--fs/f2fs/recovery.c300
-rw-r--r--fs/f2fs/segment.c959
-rw-r--r--fs/f2fs/segment.h227
-rw-r--r--fs/f2fs/super.c749
-rw-r--r--fs/f2fs/xattr.c451
-rw-r--r--fs/f2fs/xattr.h45
23 files changed, 5370 insertions, 2625 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index fd27e7e6326..214fe1054fc 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -51,3 +51,23 @@ config F2FS_FS_POSIX_ACL
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N
+
+config F2FS_FS_SECURITY
+ bool "F2FS Security Labels"
+ depends on F2FS_FS_XATTR
+ help
+ Security labels provide an access control facility to support Linux
+ Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
+ Linux. This option enables an extended attribute handler for file
+ security labels in the f2fs filesystem, so that it requires enabling
+ the extended attribute support in advance.
+
+ If you are not using a security module, say N.
+
+config F2FS_CHECK_FS
+ bool "F2FS consistency checking feature"
+ depends on F2FS_FS
+ help
+ Enables BUG_ONs which check the file system consistency in runtime.
+
+ If you want to improve the performance, say N.
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 27a0820340b..2e35da12d29 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_F2FS_FS) += f2fs.o
-f2fs-y := dir.o file.o inode.o namei.o hash.o super.o
+f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 137af4255da..dbe2141d10a 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -17,9 +17,6 @@
#include "xattr.h"
#include "acl.h"
-#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
- (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
-
static inline size_t f2fs_acl_size(int count)
{
if (count <= 4) {
@@ -167,25 +164,17 @@ fail:
struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
void *value = NULL;
struct posix_acl *acl;
int retval;
- if (!test_opt(sbi, POSIX_ACL))
- return NULL;
-
- acl = get_cached_acl(inode, type);
- if (acl != ACL_NOT_CACHED)
- return acl;
-
if (type == ACL_TYPE_ACCESS)
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
if (retval > 0) {
- value = kmalloc(retval, GFP_KERNEL);
+ value = kmalloc(retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
retval = f2fs_getxattr(inode, name_index, "", value, retval);
@@ -205,19 +194,20 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
return acl;
}
-static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+static int __f2fs_set_acl(struct inode *inode, int type,
+ struct posix_acl *acl, struct page *ipage)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
int name_index;
void *value = NULL;
size_t size = 0;
int error;
- if (!test_opt(sbi, POSIX_ACL))
- return 0;
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
+ if (acl) {
+ error = posix_acl_valid(acl);
+ if (error < 0)
+ return error;
+ }
switch (type) {
case ACL_TYPE_ACCESS:
@@ -250,7 +240,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
}
}
- error = f2fs_setxattr(inode, name_index, "", value, size);
+ error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
kfree(value);
if (!error)
@@ -260,153 +250,31 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
}
-int f2fs_init_acl(struct inode *inode, struct inode *dir)
+int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
- struct posix_acl *acl = NULL;
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- int error = 0;
-
- if (!S_ISLNK(inode->i_mode)) {
- if (test_opt(sbi, POSIX_ACL)) {
- acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- }
- if (!acl)
- inode->i_mode &= ~current_umask();
- }
-
- if (test_opt(sbi, POSIX_ACL) && acl) {
-
- if (S_ISDIR(inode->i_mode)) {
- error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
- if (error)
- goto cleanup;
- }
- error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
- if (error < 0)
- return error;
- if (error > 0)
- error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
- }
-cleanup:
- posix_acl_release(acl);
- return error;
+ return __f2fs_set_acl(inode, type, acl, NULL);
}
-int f2fs_acl_chmod(struct inode *inode)
+int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct posix_acl *acl;
- int error;
- mode_t mode = get_inode_mode(inode);
-
- if (!test_opt(sbi, POSIX_ACL))
- return 0;
- if (S_ISLNK(mode))
- return -EOPNOTSUPP;
-
- acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl) || !acl)
- return PTR_ERR(acl);
+ struct posix_acl *default_acl, *acl;
+ int error = 0;
- error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
+ error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
if (error)
return error;
- error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
- posix_acl_release(acl);
- return error;
-}
-
-static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list,
- size_t list_size, const char *name, size_t name_len, int type)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- const char *xname = POSIX_ACL_XATTR_DEFAULT;
- size_t size;
- if (!test_opt(sbi, POSIX_ACL))
- return 0;
-
- if (type == ACL_TYPE_ACCESS)
- xname = POSIX_ACL_XATTR_ACCESS;
-
- size = strlen(xname) + 1;
- if (list && size <= list_size)
- memcpy(list, xname, size);
- return size;
-}
-
-static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name,
- void *buffer, size_t size, int type)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- struct posix_acl *acl;
- int error;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!test_opt(sbi, POSIX_ACL))
- return -EOPNOTSUPP;
-
- acl = f2fs_get_acl(dentry->d_inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (!acl)
- return -ENODATA;
- error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
-
- return error;
-}
-
-static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl = NULL;
- int error;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!test_opt(sbi, POSIX_ACL))
- return -EOPNOTSUPP;
- if (!inode_owner_or_capable(inode))
- return -EPERM;
-
- if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- error = posix_acl_valid(acl);
- if (error)
- goto release_and_out;
- }
- } else {
- acl = NULL;
+ if (default_acl) {
+ error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
+ ipage);
+ posix_acl_release(default_acl);
+ }
+ if (acl) {
+ if (error)
+ error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
+ ipage);
+ posix_acl_release(acl);
}
- error = f2fs_set_acl(inode, type, acl);
-
-release_and_out:
- posix_acl_release(acl);
return error;
}
-
-const struct xattr_handler f2fs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .list = f2fs_xattr_list_acl,
- .get = f2fs_xattr_get_acl,
- .set = f2fs_xattr_set_acl,
-};
-
-const struct xattr_handler f2fs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .list = f2fs_xattr_list_acl,
- .get = f2fs_xattr_get_acl,
- .set = f2fs_xattr_set_acl,
-};
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 80f43067441..e0864651cdc 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -36,20 +36,16 @@ struct f2fs_acl_header {
#ifdef CONFIG_F2FS_FS_POSIX_ACL
-extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type);
-extern int f2fs_acl_chmod(struct inode *inode);
-extern int f2fs_init_acl(struct inode *inode, struct inode *dir);
+extern struct posix_acl *f2fs_get_acl(struct inode *, int);
+extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);
#else
#define f2fs_check_acl NULL
#define f2fs_get_acl NULL
#define f2fs_set_acl NULL
-static inline int f2fs_acl_chmod(struct inode *inode)
-{
- return 0;
-}
-
-static inline int f2fs_init_acl(struct inode *inode, struct inode *dir)
+static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
+ struct page *page)
{
return 0;
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 2b6fc131e2c..0b4710c1d37 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -20,6 +20,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include <trace/events/f2fs.h>
static struct kmem_cache *orphan_entry_slab;
static struct kmem_cache *inode_entry_slab;
@@ -29,7 +30,7 @@ static struct kmem_cache *inode_entry_slab;
*/
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
repeat:
page = grab_cache_page(mapping, index);
@@ -37,9 +38,7 @@ repeat:
cond_resched();
goto repeat;
}
-
- /* We wait writeback only inside grab_meta_page() */
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, META);
SetPageUptodate(page);
return page;
}
@@ -49,7 +48,7 @@ repeat:
*/
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
repeat:
page = grab_cache_page(mapping, index);
@@ -57,64 +56,157 @@ repeat:
cond_resched();
goto repeat;
}
- if (f2fs_readpage(sbi, page, index, READ_SYNC)) {
+ if (PageUptodate(page))
+ goto out;
+
+ if (f2fs_submit_page_bio(sbi, page, index,
+ READ_SYNC | REQ_META | REQ_PRIO))
+ goto repeat;
+
+ lock_page(page);
+ if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
- mark_page_accessed(page);
-
- /* We do not allow returning an errorneous page */
+out:
return page;
}
+static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+{
+ switch (type) {
+ case META_NAT:
+ return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
+ case META_SIT:
+ return SIT_BLK_CNT(sbi);
+ case META_SSA:
+ case META_CP:
+ return 0;
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Readahead CP/NAT/SIT/SSA pages
+ */
+int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
+{
+ block_t prev_blk_addr = 0;
+ struct page *page;
+ int blkno = start;
+ int max_blks = get_max_meta_blks(sbi, type);
+
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = READ_SYNC | REQ_META | REQ_PRIO
+ };
+
+ for (; nrpages-- > 0; blkno++) {
+ block_t blk_addr;
+
+ switch (type) {
+ case META_NAT:
+ /* get nat block addr */
+ if (unlikely(blkno >= max_blks))
+ blkno = 0;
+ blk_addr = current_nat_addr(sbi,
+ blkno * NAT_ENTRY_PER_BLOCK);
+ break;
+ case META_SIT:
+ /* get sit block addr */
+ if (unlikely(blkno >= max_blks))
+ goto out;
+ blk_addr = current_sit_addr(sbi,
+ blkno * SIT_ENTRY_PER_BLOCK);
+ if (blkno != start && prev_blk_addr + 1 != blk_addr)
+ goto out;
+ prev_blk_addr = blk_addr;
+ break;
+ case META_SSA:
+ case META_CP:
+ /* get ssa/cp block addr */
+ blk_addr = blkno;
+ break;
+ default:
+ BUG();
+ }
+
+ page = grab_cache_page(META_MAPPING(sbi), blk_addr);
+ if (!page)
+ continue;
+ if (PageUptodate(page)) {
+ f2fs_put_page(page, 1);
+ continue;
+ }
+
+ f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
+ f2fs_put_page(page, 0);
+ }
+out:
+ f2fs_submit_merged_bio(sbi, META, READ);
+ return blkno - start;
+}
+
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- /* Should not write any meta pages, if any IO error was occurred */
- if (wbc->for_reclaim ||
- is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
- dec_page_count(sbi, F2FS_DIRTY_META);
- wbc->pages_skipped++;
- set_page_dirty(page);
- return AOP_WRITEPAGE_ACTIVATE;
- }
+ trace_f2fs_writepage(page, META);
+
+ if (unlikely(sbi->por_doing))
+ goto redirty_out;
+ if (wbc->for_reclaim)
+ goto redirty_out;
- wait_on_page_writeback(page);
+ /* Should not write any meta pages, if any IO error was occurred */
+ if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+ goto no_write;
+ f2fs_wait_on_page_writeback(page, META);
write_meta_page(sbi, page);
+no_write:
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
}
static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- struct block_device *bdev = sbi->sb->s_bdev;
- long written;
+ long diff, written;
- if (wbc->for_kupdate)
- return 0;
+ trace_f2fs_writepages(mapping->host, wbc, META);
- if (get_pages(sbi, F2FS_DIRTY_META) == 0)
- return 0;
+ /* collect a number of dirty meta pages and write together */
+ if (wbc->for_kupdate ||
+ get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+ goto skip_write;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
- written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
+ diff = nr_pages_to_write(sbi, META, wbc);
+ written = sync_meta_pages(sbi, META, wbc->nr_to_write);
mutex_unlock(&sbi->cp_mutex);
- wbc->nr_to_write -= written;
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
+ return 0;
+
+skip_write:
+ wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
return 0;
}
long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
+ struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = LONG_MAX;
struct pagevec pvec;
long nwritten = 0;
@@ -129,20 +221,33 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
+ if (unlikely(nr_pages == 0))
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+
lock_page(page);
- BUG_ON(page->mapping != mapping);
- BUG_ON(!PageDirty(page));
- clear_page_dirty_for_io(page);
+
+ if (unlikely(page->mapping != mapping)) {
+continue_unlock:
+ unlock_page(page);
+ continue;
+ }
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
+
+ if (!clear_page_dirty_for_io(page))
+ goto continue_unlock;
+
if (f2fs_write_meta_page(page, &wbc)) {
unlock_page(page);
break;
}
- if (nwritten++ >= nr_to_write)
+ nwritten++;
+ if (unlikely(nwritten >= nr_to_write))
break;
}
pagevec_release(&pvec);
@@ -150,7 +255,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
}
if (nwritten)
- f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
+ f2fs_submit_merged_bio(sbi, type, WRITE);
return nwritten;
}
@@ -160,6 +265,8 @@ static int f2fs_set_meta_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+ trace_f2fs_set_page_dirty(page, META);
+
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
@@ -175,99 +282,99 @@ const struct address_space_operations f2fs_meta_aops = {
.set_page_dirty = f2fs_set_meta_page_dirty,
};
-int check_orphan_space(struct f2fs_sb_info *sbi)
+int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
- unsigned int max_orphans;
int err = 0;
- /*
- * considering 512 blocks in a segment 5 blocks are needed for cp
- * and log segment summaries. Remaining blocks are used to keep
- * orphan entries with the limitation one reserved segment
- * for cp pack we can have max 1020*507 orphan entries
- */
- max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
- mutex_lock(&sbi->orphan_inode_mutex);
- if (sbi->n_orphans >= max_orphans)
+ spin_lock(&sbi->orphan_inode_lock);
+ if (unlikely(sbi->n_orphans >= sbi->max_orphans))
err = -ENOSPC;
- mutex_unlock(&sbi->orphan_inode_mutex);
+ else
+ sbi->n_orphans++;
+ spin_unlock(&sbi->orphan_inode_lock);
+
return err;
}
+void release_orphan_inode(struct f2fs_sb_info *sbi)
+{
+ spin_lock(&sbi->orphan_inode_lock);
+ f2fs_bug_on(sbi->n_orphans == 0);
+ sbi->n_orphans--;
+ spin_unlock(&sbi->orphan_inode_lock);
+}
+
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head, *this;
- struct orphan_inode_entry *new = NULL, *orphan = NULL;
+ struct list_head *head;
+ struct orphan_inode_entry *new, *orphan;
+
+ new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
+ new->ino = ino;
- mutex_lock(&sbi->orphan_inode_mutex);
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
- list_for_each(this, head) {
- orphan = list_entry(this, struct orphan_inode_entry, list);
- if (orphan->ino == ino)
- goto out;
+ list_for_each_entry(orphan, head, list) {
+ if (orphan->ino == ino) {
+ spin_unlock(&sbi->orphan_inode_lock);
+ kmem_cache_free(orphan_entry_slab, new);
+ return;
+ }
+
if (orphan->ino > ino)
break;
- orphan = NULL;
- }
-retry:
- new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
- if (!new) {
- cond_resched();
- goto retry;
}
- new->ino = ino;
- /* add new_oentry into list which is sorted by inode number */
- if (orphan)
- list_add(&new->list, this->prev);
- else
- list_add_tail(&new->list, head);
-
- sbi->n_orphans++;
-out:
- mutex_unlock(&sbi->orphan_inode_mutex);
+ /* add new orphan entry into list which is sorted by inode number */
+ list_add_tail(&new->list, &orphan->list);
+ spin_unlock(&sbi->orphan_inode_lock);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *this, *next, *head;
+ struct list_head *head;
struct orphan_inode_entry *orphan;
- mutex_lock(&sbi->orphan_inode_mutex);
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
- list_for_each_safe(this, next, head) {
- orphan = list_entry(this, struct orphan_inode_entry, list);
+ list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
list_del(&orphan->list);
- kmem_cache_free(orphan_entry_slab, orphan);
+ f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- break;
+ spin_unlock(&sbi->orphan_inode_lock);
+ kmem_cache_free(orphan_entry_slab, orphan);
+ return;
}
}
- mutex_unlock(&sbi->orphan_inode_mutex);
+ spin_unlock(&sbi->orphan_inode_lock);
}
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode = f2fs_iget(sbi->sb, ino);
- BUG_ON(IS_ERR(inode));
+ f2fs_bug_on(IS_ERR(inode));
clear_nlink(inode);
/* truncate all the data during iput */
iput(inode);
}
-int recover_orphan_inodes(struct f2fs_sb_info *sbi)
+void recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blkaddr, i, j;
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
- return 0;
+ return;
- sbi->por_doing = 1;
- start_blk = __start_cp_addr(sbi) + 1;
+ sbi->por_doing = true;
+
+ start_blk = __start_cp_addr(sbi) + 1 +
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
orphan_blkaddr = __start_sum_addr(sbi) - 1;
+ ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
+
for (i = 0; i < orphan_blkaddr; i++) {
struct page *page = get_meta_page(sbi, start_blk + i);
struct f2fs_orphan_block *orphan_blk;
@@ -281,30 +388,40 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
}
/* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
- sbi->por_doing = 0;
- return 0;
+ sbi->por_doing = false;
+ return;
}
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
- struct list_head *head, *this, *next;
+ struct list_head *head;
struct f2fs_orphan_block *orphan_blk = NULL;
- struct page *page = NULL;
unsigned int nentries = 0;
- unsigned short index = 1;
- unsigned short orphan_blocks;
-
- orphan_blocks = (unsigned short)((sbi->n_orphans +
+ unsigned short index;
+ unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
+ struct page *page = NULL;
+ struct orphan_inode_entry *orphan = NULL;
+
+ for (index = 0; index < orphan_blocks; index++)
+ grab_meta_page(sbi, start_blk + index);
- mutex_lock(&sbi->orphan_inode_mutex);
+ index = 1;
+ spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
/* loop for each orphan inode entry and write them in Jornal block */
- list_for_each_safe(this, next, head) {
- struct orphan_inode_entry *orphan;
+ list_for_each_entry(orphan, head, list) {
+ if (!page) {
+ page = find_get_page(META_MAPPING(sbi), start_blk++);
+ f2fs_bug_on(!page);
+ orphan_blk =
+ (struct f2fs_orphan_block *)page_address(page);
+ memset(orphan_blk, 0, sizeof(*orphan_blk));
+ f2fs_put_page(page, 0);
+ }
- orphan = list_entry(this, struct orphan_inode_entry, list);
+ orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
if (nentries == F2FS_ORPHANS_PER_BLOCK) {
/*
@@ -318,29 +435,20 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
set_page_dirty(page);
f2fs_put_page(page, 1);
index++;
- start_blk++;
nentries = 0;
page = NULL;
}
- if (page)
- goto page_exist;
+ }
- page = grab_meta_page(sbi, start_blk);
- orphan_blk = (struct f2fs_orphan_block *)page_address(page);
- memset(orphan_blk, 0, sizeof(*orphan_blk));
-page_exist:
- orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
+ if (page) {
+ orphan_blk->blk_addr = cpu_to_le16(index);
+ orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
+ orphan_blk->entry_count = cpu_to_le32(nentries);
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
}
- if (!page)
- goto end;
-
- orphan_blk->blk_addr = cpu_to_le16(index);
- orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
- orphan_blk->entry_count = cpu_to_le32(nentries);
- set_page_dirty(page);
- f2fs_put_page(page, 1);
-end:
- mutex_unlock(&sbi->orphan_inode_mutex);
+
+ spin_unlock(&sbi->orphan_inode_lock);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -350,8 +458,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
unsigned long blk_size = sbi->blocksize;
struct f2fs_checkpoint *cp_block;
unsigned long long cur_version = 0, pre_version = 0;
- unsigned int crc = 0;
size_t crc_offset;
+ __u32 crc = 0;
/* Read the 1st cp block in this CP pack */
cp_page_1 = get_meta_page(sbi, cp_addr);
@@ -362,11 +470,11 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (crc_offset >= blk_size)
goto invalid_cp1;
- crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
+ crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
if (!f2fs_crc_valid(crc, cp_block, crc_offset))
goto invalid_cp1;
- pre_version = le64_to_cpu(cp_block->checkpoint_ver);
+ pre_version = cur_cp_version(cp_block);
/* Read the 2nd cp block in this CP pack */
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
@@ -377,11 +485,11 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (crc_offset >= blk_size)
goto invalid_cp2;
- crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
+ crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
if (!f2fs_crc_valid(crc, cp_block, crc_offset))
goto invalid_cp2;
- cur_version = le64_to_cpu(cp_block->checkpoint_ver);
+ cur_version = cur_cp_version(cp_block);
if (cur_version == pre_version) {
*version = cur_version;
@@ -403,8 +511,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned long blk_size = sbi->blocksize;
unsigned long long cp1_version = 0, cp2_version = 0;
unsigned long long cp_start_blk_no;
+ unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+ block_t cp_blk_no;
+ int i;
- sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
+ sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
if (!sbi->ckpt)
return -ENOMEM;
/*
@@ -415,7 +526,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
/* The second checkpoint pack should start at the next segment */
- cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+ cp_start_blk_no += ((unsigned long long)1) <<
+ le32_to_cpu(fsb->log_blocks_per_seg);
cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
if (cp1 && cp2) {
@@ -434,6 +546,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
+ if (cp_blks <= 1)
+ goto done;
+
+ cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+ if (cur_page == cp2)
+ cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+
+ for (i = 1; i < cp_blks; i++) {
+ void *sit_bitmap_ptr;
+ unsigned char *ckpt = (unsigned char *)sbi->ckpt;
+
+ cur_page = get_meta_page(sbi, cp_blk_no + i);
+ sit_bitmap_ptr = page_address(cur_page);
+ memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
+ f2fs_put_page(cur_page, 1);
+ }
+done:
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
return 0;
@@ -443,79 +572,100 @@ fail_no_cp:
return -EINVAL;
}
+static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+
+ if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
+ return -EEXIST;
+
+ set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+ F2FS_I(inode)->dirty_dir = new;
+ list_add_tail(&new->list, &sbi->dir_inode_list);
+ stat_inc_dirty_dir(sbi);
+ return 0;
+}
+
void set_dirty_dir_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct list_head *head = &sbi->dir_inode_list;
struct dir_inode_entry *new;
- struct list_head *this;
+ int ret = 0;
if (!S_ISDIR(inode->i_mode))
return;
-retry:
- new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
- if (!new) {
- cond_resched();
- goto retry;
- }
+
+ new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
- list_for_each(this, head) {
- struct dir_inode_entry *entry;
- entry = list_entry(this, struct dir_inode_entry, list);
- if (entry->inode == inode) {
- kmem_cache_free(inode_entry_slab, new);
- goto out;
- }
- }
- list_add_tail(&new->list, head);
- sbi->n_dirty_dirs++;
-
- BUG_ON(!S_ISDIR(inode->i_mode));
-out:
- inc_page_count(sbi, F2FS_DIRTY_DENTS);
+ ret = __add_dirty_inode(inode, new);
inode_inc_dirty_dents(inode);
SetPagePrivate(page);
+ spin_unlock(&sbi->dir_inode_lock);
+
+ if (ret)
+ kmem_cache_free(inode_entry_slab, new);
+}
+
+void add_dirty_dir_inode(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct dir_inode_entry *new =
+ f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ int ret = 0;
+ new->inode = inode;
+ INIT_LIST_HEAD(&new->list);
+
+ spin_lock(&sbi->dir_inode_lock);
+ ret = __add_dirty_inode(inode, new);
spin_unlock(&sbi->dir_inode_lock);
+
+ if (ret)
+ kmem_cache_free(inode_entry_slab, new);
}
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct list_head *head = &sbi->dir_inode_list;
- struct list_head *this;
+ struct dir_inode_entry *entry;
if (!S_ISDIR(inode->i_mode))
return;
spin_lock(&sbi->dir_inode_lock);
- if (atomic_read(&F2FS_I(inode)->dirty_dents))
- goto out;
-
- list_for_each(this, head) {
- struct dir_inode_entry *entry;
- entry = list_entry(this, struct dir_inode_entry, list);
- if (entry->inode == inode) {
- list_del(&entry->list);
- kmem_cache_free(inode_entry_slab, entry);
- sbi->n_dirty_dirs--;
- break;
- }
+ if (get_dirty_dents(inode) ||
+ !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
+ spin_unlock(&sbi->dir_inode_lock);
+ return;
}
-out:
+
+ entry = F2FS_I(inode)->dirty_dir;
+ list_del(&entry->list);
+ F2FS_I(inode)->dirty_dir = NULL;
+ clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+ stat_dec_dirty_dir(sbi);
spin_unlock(&sbi->dir_inode_lock);
+ kmem_cache_free(inode_entry_slab, entry);
+
+ /* Only from the recovery routine */
+ if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
+ clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
+ iput(inode);
+ }
}
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
{
- struct list_head *head = &sbi->dir_inode_list;
+ struct list_head *head;
struct dir_inode_entry *entry;
struct inode *inode;
retry:
spin_lock(&sbi->dir_inode_lock);
+
+ head = &sbi->dir_inode_list;
if (list_empty(head)) {
spin_unlock(&sbi->dir_inode_lock);
return;
@@ -524,14 +674,14 @@ retry:
inode = igrab(entry->inode);
spin_unlock(&sbi->dir_inode_lock);
if (inode) {
- filemap_flush(inode->i_mapping);
+ filemap_fdatawrite(inode->i_mapping);
iput(inode);
} else {
/*
* We should submit bio, since it exists several
* wribacking dentry pages in the freeing inode.
*/
- f2fs_submit_bio(sbi, DATA, true);
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
}
goto retry;
}
@@ -541,54 +691,58 @@ retry:
*/
static void block_operations(struct f2fs_sb_info *sbi)
{
- int t;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
+ struct blk_plug plug;
- /* Stop renaming operation */
- mutex_lock_op(sbi, RENAME);
- mutex_lock_op(sbi, DENTRY_OPS);
+ blk_start_plug(&plug);
-retry_dents:
+retry_flush_dents:
+ f2fs_lock_all(sbi);
/* write all the dirty dentry pages */
- sync_dirty_dir_inodes(sbi);
-
- mutex_lock_op(sbi, DATA_WRITE);
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
- mutex_unlock_op(sbi, DATA_WRITE);
- goto retry_dents;
+ f2fs_unlock_all(sbi);
+ sync_dirty_dir_inodes(sbi);
+ goto retry_flush_dents;
}
- /* block all the operations */
- for (t = DATA_NEW; t <= NODE_TRUNC; t++)
- mutex_lock_op(sbi, t);
-
- mutex_lock(&sbi->write_inode);
-
/*
* POR: we should ensure that there is no dirty node pages
* until finishing nat/sit flush.
*/
-retry:
- sync_node_pages(sbi, 0, &wbc);
-
- mutex_lock_op(sbi, NODE_WRITE);
+retry_flush_nodes:
+ mutex_lock(&sbi->node_write);
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
- mutex_unlock_op(sbi, NODE_WRITE);
- goto retry;
+ mutex_unlock(&sbi->node_write);
+ sync_node_pages(sbi, 0, &wbc);
+ goto retry_flush_nodes;
}
- mutex_unlock(&sbi->write_inode);
+ blk_finish_plug(&plug);
}
static void unblock_operations(struct f2fs_sb_info *sbi)
{
- int t;
- for (t = NODE_WRITE; t >= RENAME; t--)
- mutex_unlock_op(sbi, t);
+ mutex_unlock(&sbi->node_write);
+ f2fs_unlock_all(sbi);
+}
+
+static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+{
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ if (!get_pages(sbi, F2FS_WRITEBACK))
+ break;
+
+ io_schedule();
+ }
+ finish_wait(&sbi->cp_wait, &wait);
}
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
@@ -598,9 +752,16 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
block_t start_blk;
struct page *cp_page;
unsigned int data_sum_blocks, orphan_blocks;
- unsigned int crc32 = 0;
+ __u32 crc32 = 0;
void *kaddr;
int i;
+ int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+
+ /*
+ * This avoids to conduct wrong roll-forward operations and uses
+ * metapages, so should be called prior to sync_meta_pages below.
+ */
+ discard_next_dnode(sbi);
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
@@ -645,16 +806,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
/ F2FS_ORPHANS_PER_BLOCK;
- ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
+ ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
+ orphan_blocks);
if (is_umount) {
set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
- data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
+ cp_payload_blks + data_sum_blocks +
+ orphan_blocks + NR_CURSEG_NODE_TYPE);
} else {
clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
- data_sum_blocks + orphan_blocks);
+ cp_payload_blks + data_sum_blocks +
+ orphan_blocks);
}
if (sbi->n_orphans)
@@ -667,8 +831,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
- *(__le32 *)((unsigned char *)ckpt +
- le32_to_cpu(ckpt->checksum_offset))
+ *((__le32 *)((unsigned char *)ckpt +
+ le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
start_blk = __start_cp_addr(sbi);
@@ -680,6 +844,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
set_page_dirty(cp_page);
f2fs_put_page(cp_page, 1);
+ for (i = 1; i < 1 + cp_payload_blks; i++) {
+ cp_page = grab_meta_page(sbi, start_blk++);
+ kaddr = page_address(cp_page);
+ memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
+ (1 << sbi->log_blocksize));
+ set_page_dirty(cp_page);
+ f2fs_put_page(cp_page, 1);
+ }
+
if (sbi->n_orphans) {
write_orphan_inodes(sbi, start_blk);
start_blk += orphan_blocks;
@@ -700,11 +873,10 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
f2fs_put_page(cp_page, 1);
/* wait for previous submitted node/meta pages writeback */
- while (get_pages(sbi, F2FS_WRITEBACK))
- congestion_wait(BLK_RW_ASYNC, HZ / 50);
+ wait_on_all_pages_writeback(sbi);
- filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
- filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
+ filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
+ filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
/* update user_block_counts */
sbi->last_valid_block_count = sbi->total_valid_block_count;
@@ -713,7 +885,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
- if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
+ if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
clear_prefree_segments(sbi);
F2FS_RESET_SB_DIRT(sbi);
}
@@ -727,50 +899,63 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_ver;
+ trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
+
mutex_lock(&sbi->cp_mutex);
block_operations(sbi);
- f2fs_submit_bio(sbi, DATA, true);
- f2fs_submit_bio(sbi, NODE, true);
- f2fs_submit_bio(sbi, META, true);
+ trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
+
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ f2fs_submit_merged_bio(sbi, META, WRITE);
/*
* update checkpoint pack index
* Increase the version number so that
* SIT entries and seg summaries are written at correct place
*/
- ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver);
+ ckpt_ver = cur_cp_version(ckpt);
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
flush_nat_entries(sbi);
flush_sit_entries(sbi);
- reset_victim_segmap(sbi);
-
/* unlock all the fs_lock[] in do_checkpoint() */
do_checkpoint(sbi, is_umount);
unblock_operations(sbi);
mutex_unlock(&sbi->cp_mutex);
+
+ stat_inc_cp_count(sbi->stat_info);
+ trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}
void init_orphan_info(struct f2fs_sb_info *sbi)
{
- mutex_init(&sbi->orphan_inode_mutex);
+ spin_lock_init(&sbi->orphan_inode_lock);
INIT_LIST_HEAD(&sbi->orphan_inode_list);
sbi->n_orphans = 0;
+ /*
+ * considering 512 blocks in a segment 8 blocks are needed for cp
+ * and log segment summaries. Remaining blocks are used to keep
+ * orphan entries with the limitation one reserved segment
+ * for cp pack we can have max 1020*504 orphan entries
+ */
+ sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
+ * F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
{
orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
- sizeof(struct orphan_inode_entry), NULL);
- if (unlikely(!orphan_entry_slab))
+ sizeof(struct orphan_inode_entry));
+ if (!orphan_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
- sizeof(struct dir_inode_entry), NULL);
- if (unlikely(!inode_entry_slab)) {
+ sizeof(struct dir_inode_entry));
+ if (!inode_entry_slab) {
kmem_cache_destroy(orphan_entry_slab);
return -ENOMEM;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7bd22a20112..f8cf619edb5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,6 +12,7 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
+#include <linux/aio.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
@@ -21,6 +22,191 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include <trace/events/f2fs.h>
+
+static void f2fs_read_end_io(struct bio *bio, int err)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+
+ if (!err) {
+ SetPageUptodate(page);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+ unlock_page(page);
+ }
+ bio_put(bio);
+}
+
+static void f2fs_write_end_io(struct bio *bio, int err)
+{
+ struct f2fs_sb_info *sbi = bio->bi_private;
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+
+ if (unlikely(err)) {
+ SetPageError(page);
+ set_bit(AS_EIO, &page->mapping->flags);
+ f2fs_stop_checkpoint(sbi);
+ }
+ end_page_writeback(page);
+ dec_page_count(sbi, F2FS_WRITEBACK);
+ }
+
+ if (sbi->wait_io) {
+ complete(sbi->wait_io);
+ sbi->wait_io = NULL;
+ }
+
+ if (!get_pages(sbi, F2FS_WRITEBACK) &&
+ !list_empty(&sbi->cp_wait.task_list))
+ wake_up(&sbi->cp_wait);
+
+ bio_put(bio);
+}
+
+/*
+ * Low-level block read/write IO operations.
+ */
+static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
+ int npages, bool is_read)
+{
+ struct bio *bio;
+
+ /* No failure on bio allocation */
+ bio = bio_alloc(GFP_NOIO, npages);
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+ bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
+ bio->bi_private = sbi;
+
+ return bio;
+}
+
+static void __submit_merged_bio(struct f2fs_bio_info *io)
+{
+ struct f2fs_io_info *fio = &io->fio;
+ int rw;
+
+ if (!io->bio)
+ return;
+
+ rw = fio->rw;
+
+ if (is_read_io(rw)) {
+ trace_f2fs_submit_read_bio(io->sbi->sb, rw,
+ fio->type, io->bio);
+ submit_bio(rw, io->bio);
+ } else {
+ trace_f2fs_submit_write_bio(io->sbi->sb, rw,
+ fio->type, io->bio);
+ /*
+ * META_FLUSH is only from the checkpoint procedure, and we
+ * should wait this metadata bio for FS consistency.
+ */
+ if (fio->type == META_FLUSH) {
+ DECLARE_COMPLETION_ONSTACK(wait);
+ io->sbi->wait_io = &wait;
+ submit_bio(rw, io->bio);
+ wait_for_completion(&wait);
+ } else {
+ submit_bio(rw, io->bio);
+ }
+ }
+
+ io->bio = NULL;
+}
+
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
+ enum page_type type, int rw)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io;
+
+ io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
+
+ down_write(&io->io_rwsem);
+
+ /* change META to META_FLUSH in the checkpoint procedure */
+ if (type >= META_FLUSH) {
+ io->fio.type = META_FLUSH;
+ io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ }
+ __submit_merged_bio(io);
+ up_write(&io->io_rwsem);
+}
+
+/*
+ * Fill the locked page with data located in the block address.
+ * Return unlocked page.
+ */
+int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
+ block_t blk_addr, int rw)
+{
+ struct bio *bio;
+
+ trace_f2fs_submit_page_bio(page, blk_addr, rw);
+
+ /* Allocate a new bio */
+ bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
+
+ if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ bio_put(bio);
+ f2fs_put_page(page, 1);
+ return -EFAULT;
+ }
+
+ submit_bio(rw, bio);
+ return 0;
+}
+
+void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
+ block_t blk_addr, struct f2fs_io_info *fio)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
+ struct f2fs_bio_info *io;
+ bool is_read = is_read_io(fio->rw);
+
+ io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+
+ verify_block_addr(sbi, blk_addr);
+
+ down_write(&io->io_rwsem);
+
+ if (!is_read)
+ inc_page_count(sbi, F2FS_WRITEBACK);
+
+ if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
+ io->fio.rw != fio->rw))
+ __submit_merged_bio(io);
+alloc_new:
+ if (io->bio == NULL) {
+ int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+
+ io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
+ io->fio = *fio;
+ }
+
+ if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
+ PAGE_CACHE_SIZE) {
+ __submit_merged_bio(io);
+ goto alloc_new;
+ }
+
+ io->last_block_in_bio = blk_addr;
+
+ up_write(&io->io_rwsem);
+ trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
+}
/*
* Lock ordering for the change of data block address:
@@ -35,9 +221,9 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
struct page *node_page = dn->node_page;
unsigned int ofs_in_node = dn->ofs_in_node;
- wait_on_page_writeback(node_page);
+ f2fs_wait_on_page_writeback(node_page, NODE);
- rn = (struct f2fs_node *)page_address(node_page);
+ rn = F2FS_NODE(node_page);
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
@@ -49,32 +235,57 @@ int reserve_new_block(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
- if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM;
- if (!inc_valid_block_count(sbi, dn->inode, 1))
+ if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
return -ENOSPC;
+ trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
+
__set_data_blkaddr(dn, NEW_ADDR);
dn->data_blkaddr = NEW_ADDR;
+ mark_inode_dirty(dn->inode);
sync_inode_page(dn);
return 0;
}
+int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
+{
+ bool need_put = dn->inode_page ? false : true;
+ int err;
+
+ /* if inode_page exists, index should be zero */
+ f2fs_bug_on(!need_put && index);
+
+ err = get_dnode_of_data(dn, index, ALLOC_NODE);
+ if (err)
+ return err;
+
+ if (dn->data_blkaddr == NULL_ADDR)
+ err = reserve_new_block(dn);
+ if (err || need_put)
+ f2fs_put_dnode(dn);
+ return err;
+}
+
static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
struct buffer_head *bh_result)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr;
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return 0;
+
read_lock(&fi->ext.ext_lock);
if (fi->ext.len == 0) {
read_unlock(&fi->ext.ext_lock);
return 0;
}
- sbi->total_hit_ext++;
+ stat_inc_total_hit(inode->i_sb);
+
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
start_blkaddr = fi->ext.blk_addr;
@@ -92,7 +303,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
else
bh_result->b_size = UINT_MAX;
- sbi->read_hit_ext++;
+ stat_inc_read_hit(inode->i_sb);
read_unlock(&fi->ext.ext_lock);
return 1;
}
@@ -105,13 +316,18 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs, start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
+ int need_update = true;
- BUG_ON(blk_addr == NEW_ADDR);
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node;
+ f2fs_bug_on(blk_addr == NEW_ADDR);
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+ dn->ofs_in_node;
/* Update the page address in the parent node */
__set_data_blkaddr(dn, blk_addr);
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return;
+
write_lock(&fi->ext.ext_lock);
start_fofs = fi->ext.fofs;
@@ -133,7 +349,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
goto end_update;
}
- /* Frone merge */
+ /* Front merge */
if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
fi->ext.fofs--;
fi->ext.blk_addr--;
@@ -158,18 +374,24 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
fofs - start_fofs + 1;
fi->ext.len -= fofs - start_fofs + 1;
}
- goto end_update;
+ } else {
+ need_update = false;
}
- write_unlock(&fi->ext.ext_lock);
- return;
+ /* Finally, if the extent is very fragmented, let's drop the cache. */
+ if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
+ fi->ext.len = 0;
+ set_inode_flag(fi, FI_NO_EXTENT);
+ need_update = true;
+ }
end_update:
write_unlock(&fi->ext.ext_lock);
- sync_inode_page(dn);
+ if (need_update)
+ sync_inode_page(dn);
return;
}
-struct page *find_data_page(struct inode *inode, pgoff_t index)
+struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct address_space *mapping = inode->i_mapping;
@@ -183,7 +405,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
f2fs_put_page(page, 0);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, RDONLY_NODE);
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err)
return ERR_PTR(err);
f2fs_put_dnode(&dn);
@@ -192,19 +414,30 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
return ERR_PTR(-ENOENT);
/* By fallocate(), there is no cached page, but with NEW_ADDR */
- if (dn.data_blkaddr == NEW_ADDR)
+ if (unlikely(dn.data_blkaddr == NEW_ADDR))
return ERR_PTR(-EINVAL);
page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
- err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
- if (err) {
- f2fs_put_page(page, 1);
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ return page;
+ }
+
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
+ sync ? READ_SYNC : READA);
+ if (err)
return ERR_PTR(err);
+
+ if (sync) {
+ wait_on_page_locked(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 0);
+ return ERR_PTR(-EIO);
+ }
}
- unlock_page(page);
return page;
}
@@ -221,29 +454,51 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
struct page *page;
int err;
+repeat:
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, RDONLY_NODE);
- if (err)
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err) {
+ f2fs_put_page(page, 1);
return ERR_PTR(err);
+ }
f2fs_put_dnode(&dn);
- if (dn.data_blkaddr == NULL_ADDR)
+ if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
+ f2fs_put_page(page, 1);
return ERR_PTR(-ENOENT);
-
- page = grab_cache_page(mapping, index);
- if (!page)
- return ERR_PTR(-ENOMEM);
+ }
if (PageUptodate(page))
return page;
- BUG_ON(dn.data_blkaddr == NEW_ADDR);
- BUG_ON(dn.data_blkaddr == NULL_ADDR);
+ /*
+ * A new dentry page is allocated but not able to be written, since its
+ * new inode page couldn't be allocated due to -ENOSPC.
+ * In such the case, its blkaddr can be remained as NEW_ADDR.
+ * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
+ */
+ if (dn.data_blkaddr == NEW_ADDR) {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ return page;
+ }
- err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
- if (err) {
- f2fs_put_page(page, 1);
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
+ if (err)
return ERR_PTR(err);
+
+ lock_page(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
}
return page;
}
@@ -251,9 +506,13 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
/*
* Caller ensures that this data page is never allocated.
* A new zero-filled data page is allocated in the page cache.
+ *
+ * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
+ * Note that, ipage is set only by make_empty_dir.
*/
-struct page *get_new_data_page(struct inode *inode, pgoff_t index,
- bool new_i_size)
+struct page *get_new_data_page(struct inode *inode,
+ struct page *ipage, pgoff_t index, bool new_i_size)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct address_space *mapping = inode->i_mapping;
@@ -261,196 +520,258 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index,
struct dnode_of_data dn;
int err;
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, 0);
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, index);
if (err)
return ERR_PTR(err);
-
- if (dn.data_blkaddr == NULL_ADDR) {
- if (reserve_new_block(&dn)) {
- f2fs_put_dnode(&dn);
- return ERR_PTR(-ENOSPC);
- }
- }
- f2fs_put_dnode(&dn);
-
+repeat:
page = grab_cache_page(mapping, index);
- if (!page)
- return ERR_PTR(-ENOMEM);
+ if (!page) {
+ err = -ENOMEM;
+ goto put_err;
+ }
if (PageUptodate(page))
return page;
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
} else {
- err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
- if (err) {
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
+ READ_SYNC);
+ if (err)
+ goto put_err;
+
+ lock_page(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ err = -EIO;
+ goto put_err;
+ }
+ if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
- return ERR_PTR(err);
+ goto repeat;
}
}
- SetPageUptodate(page);
if (new_i_size &&
i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
- mark_inode_dirty_sync(inode);
+ /* Only the directory inode sets new_i_size */
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
}
return page;
-}
-
-static void read_end_io(struct bio *bio, int err)
-{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- do {
- struct page *page = bvec->bv_page;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (uptodate) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
- } while (bvec >= bio->bi_io_vec);
- kfree(bio->bi_private);
- bio_put(bio);
+put_err:
+ f2fs_put_dnode(&dn);
+ return ERR_PTR(err);
}
-/*
- * Fill the locked page with data located in the block address.
- * Read operation is synchronous, and caller must unlock the page.
- */
-int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
- block_t blk_addr, int type)
+static int __allocate_data_block(struct dnode_of_data *dn)
{
- struct block_device *bdev = sbi->sb->s_bdev;
- bool sync = (type == READ_SYNC);
- struct bio *bio;
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+ struct f2fs_summary sum;
+ block_t new_blkaddr;
+ struct node_info ni;
+ int type;
- /* This page can be already read by other threads */
- if (PageUptodate(page)) {
- if (!sync)
- unlock_page(page);
- return 0;
- }
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+ return -EPERM;
+ if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+ return -ENOSPC;
- down_read(&sbi->bio_sem);
+ __set_data_blkaddr(dn, NEW_ADDR);
+ dn->data_blkaddr = NEW_ADDR;
- /* Allocate a new bio */
- bio = f2fs_bio_alloc(bdev, 1);
+ get_node_info(sbi, dn->nid, &ni);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
- /* Initialize the bio */
- bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
- bio->bi_end_io = read_end_io;
+ type = CURSEG_WARM_DATA;
- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
- kfree(bio->bi_private);
- bio_put(bio);
- up_read(&sbi->bio_sem);
- return -EFAULT;
- }
+ allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
- submit_bio(type, bio);
- up_read(&sbi->bio_sem);
+ /* direct IO doesn't use extent cache to maximize the performance */
+ set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
+ update_extent_cache(new_blkaddr, dn);
+ clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
- /* wait for read completion if sync */
- if (sync) {
- lock_page(page);
- if (PageError(page))
- return -EIO;
- }
+ dn->data_blkaddr = new_blkaddr;
return 0;
}
/*
- * This function should be used by the data read flow only where it
- * does not check the "create" flag that indicates block allocation.
- * The reason for this special functionality is to exploit VFS readahead
- * mechanism.
+ * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
+ * If original data blocks are allocated, then give them to blockdev.
+ * Otherwise,
+ * a. preallocate requested block addresses
+ * b. do not use extent cache for better performance
+ * c. give the block addresses to blockdev
*/
-static int get_data_block_ro(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+static int __get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create, bool fiemap)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blkbits = inode->i_sb->s_blocksize_bits;
unsigned maxblocks = bh_result->b_size >> blkbits;
struct dnode_of_data dn;
- pgoff_t pgofs;
- int err;
+ int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
+ pgoff_t pgofs, end_offset;
+ int err = 0, ofs = 1;
+ bool allocated = false;
/* Get the page offset from the block offset(iblock) */
pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
if (check_extent_cache(inode, pgofs, bh_result))
- return 0;
+ goto out;
+
+ if (create)
+ f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE);
- if (err)
- return (err == -ENOENT) ? 0 : err;
+ err = get_dnode_of_data(&dn, pgofs, mode);
+ if (err) {
+ if (err == -ENOENT)
+ err = 0;
+ goto unlock_out;
+ }
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
+ goto put_out;
- /* It does not support data allocation */
- BUG_ON(create);
+ if (dn.data_blkaddr != NULL_ADDR) {
+ map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ } else if (create) {
+ err = __allocate_data_block(&dn);
+ if (err)
+ goto put_out;
+ allocated = true;
+ map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ } else {
+ goto put_out;
+ }
- if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
- int i;
- unsigned int end_offset;
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ bh_result->b_size = (((size_t)1) << blkbits);
+ dn.ofs_in_node++;
+ pgofs++;
- end_offset = IS_INODE(dn.node_page) ?
- ADDRS_PER_INODE :
- ADDRS_PER_BLOCK;
+get_next:
+ if (dn.ofs_in_node >= end_offset) {
+ if (allocated)
+ sync_inode_page(&dn);
+ allocated = false;
+ f2fs_put_dnode(&dn);
- clear_buffer_new(bh_result);
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, mode);
+ if (err) {
+ if (err == -ENOENT)
+ err = 0;
+ goto unlock_out;
+ }
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
+ goto put_out;
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ }
+
+ if (maxblocks > (bh_result->b_size >> blkbits)) {
+ block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ if (blkaddr == NULL_ADDR && create) {
+ err = __allocate_data_block(&dn);
+ if (err)
+ goto sync_out;
+ allocated = true;
+ blkaddr = dn.data_blkaddr;
+ }
/* Give more consecutive addresses for the read ahead */
- for (i = 0; i < end_offset - dn.ofs_in_node; i++)
- if (((datablock_addr(dn.node_page,
- dn.ofs_in_node + i))
- != (dn.data_blkaddr + i)) || maxblocks == i)
- break;
- map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
- bh_result->b_size = (i << blkbits);
+ if (blkaddr == (bh_result->b_blocknr + ofs)) {
+ ofs++;
+ dn.ofs_in_node++;
+ pgofs++;
+ bh_result->b_size += (((size_t)1) << blkbits);
+ goto get_next;
+ }
}
+sync_out:
+ if (allocated)
+ sync_inode_page(&dn);
+put_out:
f2fs_put_dnode(&dn);
- return 0;
+unlock_out:
+ if (create)
+ f2fs_unlock_op(sbi);
+out:
+ trace_f2fs_get_data_block(inode, iblock, bh_result, err);
+ return err;
+}
+
+static int get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, false);
+}
+
+static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, true);
+}
+
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ return generic_block_fiemap(inode, fieinfo,
+ start, len, get_data_block_fiemap);
}
static int f2fs_read_data_page(struct file *file, struct page *page)
{
- return mpage_readpage(page, get_data_block_ro);
+ struct inode *inode = page->mapping->host;
+ int ret;
+
+ trace_f2fs_readpage(page, DATA);
+
+ /* If the file has inline data, try to read it directlly */
+ if (f2fs_has_inline_data(inode))
+ ret = f2fs_read_inline_data(inode, page);
+ else
+ ret = mpage_readpage(page, get_data_block);
+
+ return ret;
}
static int f2fs_read_data_pages(struct file *file,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
+ struct inode *inode = file->f_mapping->host;
+
+ /* If the file has inline data, skip readpages */
+ if (f2fs_has_inline_data(inode))
+ return 0;
+
+ return mpage_readpages(mapping, pages, nr_pages, get_data_block);
}
-int do_write_data_page(struct page *page)
+int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
{
struct inode *inode = page->mapping->host;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- block_t old_blk_addr, new_blk_addr;
+ block_t old_blkaddr, new_blkaddr;
struct dnode_of_data dn;
int err = 0;
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, page->index, RDONLY_NODE);
+ err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
if (err)
return err;
- old_blk_addr = dn.data_blkaddr;
+ old_blkaddr = dn.data_blkaddr;
/* This page is already truncated */
- if (old_blk_addr == NULL_ADDR)
+ if (old_blkaddr == NULL_ADDR)
goto out_writepage;
set_page_writeback(page);
@@ -459,16 +780,13 @@ int do_write_data_page(struct page *page)
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (old_blk_addr != NEW_ADDR && !is_cold_data(page) &&
- need_inplace_update(inode)) {
- rewrite_data_page(F2FS_SB(inode->i_sb), page,
- old_blk_addr);
+ if (unlikely(old_blkaddr != NEW_ADDR &&
+ !is_cold_data(page) &&
+ need_inplace_update(inode))) {
+ rewrite_data_page(page, old_blkaddr, fio);
} else {
- write_data_page(inode, page, &dn,
- old_blk_addr, &new_blk_addr);
- update_extent_cache(new_blk_addr, &dn);
- F2FS_I(inode)->data_version =
- le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
+ write_data_page(page, &dn, &new_blkaddr, fio);
+ update_extent_cache(new_blkaddr, &dn);
}
out_writepage:
f2fs_put_dnode(&dn);
@@ -483,70 +801,68 @@ static int f2fs_write_data_page(struct page *page,
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = ((unsigned long long) i_size)
>> PAGE_CACHE_SHIFT;
- unsigned offset;
+ unsigned offset = 0;
+ bool need_balance_fs = false;
int err = 0;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ };
+
+ trace_f2fs_writepage(page, DATA);
if (page->index < end_index)
- goto out;
+ goto write;
/*
* If the offset is out-of-range of file size,
* this page does not have to be written to disk.
*/
offset = i_size & (PAGE_CACHE_SIZE - 1);
- if ((page->index >= end_index + 1) || !offset) {
- if (S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
- inode_dec_dirty_dents(inode);
- }
- goto unlock_out;
- }
+ if ((page->index >= end_index + 1) || !offset)
+ goto out;
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
-out:
- if (sbi->por_doing)
- goto redirty_out;
-
- if (wbc->for_reclaim && !S_ISDIR(inode->i_mode) && !is_cold_data(page))
+write:
+ if (unlikely(sbi->por_doing))
goto redirty_out;
- mutex_lock_op(sbi, DATA_WRITE);
+ /* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
- inode_dec_dirty_dents(inode);
- }
- err = do_write_data_page(page);
- if (err && err != -ENOENT) {
- wbc->pages_skipped++;
- set_page_dirty(page);
+ err = do_write_data_page(page, &fio);
+ goto done;
}
- mutex_unlock_op(sbi, DATA_WRITE);
- if (wbc->for_reclaim)
- f2fs_submit_bio(sbi, DATA, true);
+ if (!wbc->for_reclaim)
+ need_balance_fs = true;
+ else if (has_not_enough_free_secs(sbi, 0))
+ goto redirty_out;
- if (err == -ENOENT)
- goto unlock_out;
+ f2fs_lock_op(sbi);
+ if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
+ err = f2fs_write_inline_data(inode, page, offset);
+ else
+ err = do_write_data_page(page, &fio);
+ f2fs_unlock_op(sbi);
+done:
+ if (err && err != -ENOENT)
+ goto redirty_out;
clear_cold_data(page);
+out:
+ inode_dec_dirty_dents(inode);
unlock_page(page);
-
- if (!wbc->for_reclaim && !S_ISDIR(inode->i_mode))
+ if (need_balance_fs)
f2fs_balance_fs(sbi);
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
return 0;
-unlock_out:
- unlock_page(page);
- return (err == -ENOENT) ? 0 : err;
-
redirty_out:
- wbc->pages_skipped++;
- set_page_dirty(page);
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
}
-#define MAX_DESIRED_PAGES_WP 4096
-
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
@@ -561,26 +877,41 @@ static int f2fs_write_data_pages(struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ bool locked = false;
int ret;
- long excess_nrtw = 0, desired_nrtw;
+ long diff;
- if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
- desired_nrtw = MAX_DESIRED_PAGES_WP;
- excess_nrtw = desired_nrtw - wbc->nr_to_write;
- wbc->nr_to_write = desired_nrtw;
- }
+ trace_f2fs_writepages(mapping->host, wbc, DATA);
+
+ /* deal with chardevs and other special file */
+ if (!mapping->a_ops->writepage)
+ return 0;
- if (!S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+ get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) &&
+ available_free_memory(sbi, DIRTY_DENTS))
+ goto skip_write;
+
+ diff = nr_pages_to_write(sbi, DATA, wbc);
+
+ if (!S_ISDIR(inode->i_mode)) {
mutex_lock(&sbi->writepages);
+ locked = true;
+ }
ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
- if (!S_ISDIR(inode->i_mode))
+ if (locked)
mutex_unlock(&sbi->writepages);
- f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
+
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
remove_dirty_dir_inode(inode);
- wbc->nr_to_write -= excess_nrtw;
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
+
+skip_write:
+ wbc->pages_skipped += get_dirty_dents(inode);
+ return 0;
}
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
@@ -594,38 +925,43 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
struct dnode_of_data dn;
int err = 0;
- /* for nobh_write_end */
- *fsdata = NULL;
+ trace_f2fs_write_begin(inode, pos, len, flags);
f2fs_balance_fs(sbi);
+repeat:
+ err = f2fs_convert_inline_data(inode, pos + len);
+ if (err)
+ return err;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
+
+ /* to avoid latency during memory pressure */
+ unlock_page(page);
+
*pagep = page;
- mutex_lock_op(sbi, DATA_NEW);
+ if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
+ goto inline_data;
+ f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, 0);
+ err = f2fs_reserve_block(&dn, index);
+ f2fs_unlock_op(sbi);
+
if (err) {
- mutex_unlock_op(sbi, DATA_NEW);
- f2fs_put_page(page, 1);
+ f2fs_put_page(page, 0);
return err;
}
-
- if (dn.data_blkaddr == NULL_ADDR) {
- err = reserve_new_block(&dn);
- if (err) {
- f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, DATA_NEW);
- f2fs_put_page(page, 1);
- return err;
- }
+inline_data:
+ lock_page(page);
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
}
- f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, DATA_NEW);
+ f2fs_wait_on_page_writeback(page, DATA);
if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
return 0;
@@ -636,52 +972,113 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
/* Reading beyond i_size is simple: memset to zero */
zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
- return 0;
+ goto out;
}
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
- err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
- if (err) {
+ if (f2fs_has_inline_data(inode)) {
+ err = f2fs_read_inline_data(inode, page);
+ if (err) {
+ page_cache_release(page);
+ return err;
+ }
+ } else {
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
+ READ_SYNC);
+ if (err)
+ return err;
+ }
+
+ lock_page(page);
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
- return err;
+ return -EIO;
+ }
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
}
}
+out:
SetPageUptodate(page);
clear_cold_data(page);
return 0;
}
+static int f2fs_write_end(struct file *file,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = page->mapping->host;
+
+ trace_f2fs_write_end(inode, pos, len, copied);
+
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ if (pos + copied > i_size_read(inode)) {
+ i_size_write(inode, pos + copied);
+ mark_inode_dirty(inode);
+ update_inode_page(inode);
+ }
+
+ f2fs_put_page(page, 1);
+ return copied;
+}
+
+static int check_direct_IO(struct inode *inode, int rw,
+ struct iov_iter *iter, loff_t offset)
+{
+ unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
+
+ if (rw == READ)
+ return 0;
+
+ if (offset & blocksize_mask)
+ return -EINVAL;
+
+ if (iov_iter_alignment(iter) & blocksize_mask)
+ return -EINVAL;
+
+ return 0;
+}
+
static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+ struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- if (rw == WRITE)
+ /* Let buffer I/O handle the inline data case. */
+ if (f2fs_has_inline_data(inode))
+ return 0;
+
+ if (check_direct_IO(inode, rw, iter, offset))
return 0;
- /* Needs synchronization with the cleaner */
- return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
- get_data_block_ro);
+ /* clear fsync mark to recover these blocks */
+ fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
+
+ return blockdev_direct_IO(rw, iocb, inode, iter, offset,
+ get_data_block);
}
-static void f2fs_invalidate_data_page(struct page *page, unsigned long offset)
+static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
+ unsigned int length)
{
struct inode *inode = page->mapping->host;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
+ if (PageDirty(page))
inode_dec_dirty_dents(inode);
- }
ClearPagePrivate(page);
}
static int f2fs_release_data_page(struct page *page, gfp_t wait)
{
ClearPagePrivate(page);
- return 0;
+ return 1;
}
static int f2fs_set_data_page_dirty(struct page *page)
@@ -689,7 +1086,11 @@ static int f2fs_set_data_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
+ trace_f2fs_set_page_dirty(page, DATA);
+
SetPageUptodate(page);
+ mark_inode_dirty(inode);
+
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
set_dirty_dir_page(inode, page);
@@ -700,7 +1101,12 @@ static int f2fs_set_data_page_dirty(struct page *page)
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
- return generic_block_bmap(mapping, block, get_data_block_ro);
+ struct inode *inode = mapping->host;
+
+ if (f2fs_has_inline_data(inode))
+ return 0;
+
+ return generic_block_bmap(mapping, block, get_data_block);
}
const struct address_space_operations f2fs_dblock_aops = {
@@ -709,7 +1115,7 @@ const struct address_space_operations f2fs_dblock_aops = {
.writepage = f2fs_write_data_page,
.writepages = f2fs_write_data_pages,
.write_begin = f2fs_write_begin,
- .write_end = nobh_write_end,
+ .write_end = f2fs_write_end,
.set_page_dirty = f2fs_set_data_page_dirty,
.invalidatepage = f2fs_invalidate_data_page,
.releasepage = f2fs_release_data_page,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 025b9e2f935..b52c12cf587 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -13,7 +13,6 @@
#include <linux/fs.h>
#include <linux/backing-dev.h>
-#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/blkdev.h>
#include <linux/debugfs.h>
@@ -25,12 +24,12 @@
#include "gc.h"
static LIST_HEAD(f2fs_stat_list);
-static struct dentry *debugfs_root;
+static struct dentry *f2fs_debugfs_root;
static DEFINE_MUTEX(f2fs_stat_mutex);
static void update_general_status(struct f2fs_sb_info *sbi)
{
- struct f2fs_stat_info *si = sbi->stat_info;
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
int i;
/* valid check of the segment numbers */
@@ -46,14 +45,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->valid_count = valid_user_blocks(sbi);
si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi);
+ si->inline_inode = sbi->inline_inode;
si->utilization = utilization(sbi);
si->free_segs = free_segments(sbi);
si->free_secs = free_sections(sbi);
si->prefree_count = prefree_segments(sbi);
si->dirty_count = dirty_segments(sbi);
- si->node_pages = sbi->node_inode->i_mapping->nrpages;
- si->meta_pages = sbi->meta_inode->i_mapping->nrpages;
+ si->node_pages = NODE_MAPPING(sbi)->nrpages;
+ si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->sits = SIT_I(sbi)->dirty_sentries;
si->fnids = NM_I(sbi)->fcnt;
@@ -84,9 +84,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
*/
static void update_sit_info(struct f2fs_sb_info *sbi)
{
- struct f2fs_stat_info *si = sbi->stat_info;
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
- struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno, vblocks;
int ndirty = 0;
@@ -94,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
hblks_per_sec = blks_per_sec / 2;
- mutex_lock(&sit_i->sentry_lock);
for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
dist = abs(vblocks - hblks_per_sec);
@@ -105,8 +103,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
ndirty++;
}
}
- mutex_unlock(&sit_i->sentry_lock);
- dist = sbi->total_sections * hblks_per_sec * hblks_per_sec / 100;
+ dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
si->bimodal = bimodal / dist;
if (si->dirty_count)
si->avg_vblocks = total_vblocks / ndirty;
@@ -119,7 +116,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
*/
static void update_mem_info(struct f2fs_sb_info *sbi)
{
- struct f2fs_stat_info *si = sbi->stat_info;
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned npages;
if (si->base_mem)
@@ -138,14 +135,13 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi);
if (sbi->segs_per_sec > 1)
- si->base_mem += sbi->total_sections *
- sizeof(struct sec_entry);
+ si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry);
si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
/* build free segmap */
si->base_mem += sizeof(struct free_segmap_info);
si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
- si->base_mem += f2fs_bitmap_size(sbi->total_sections);
+ si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi));
/* build curseg */
si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
@@ -154,7 +150,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build dirty segmap */
si->base_mem += sizeof(struct dirty_seglist_info);
si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
- si->base_mem += 2 * f2fs_bitmap_size(TOTAL_SEGS(sbi));
+ si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi));
/* buld nm */
si->base_mem += sizeof(struct f2fs_nm_info);
@@ -167,9 +163,9 @@ get_cache:
/* free nids */
si->cache_mem = NM_I(sbi)->fcnt;
si->cache_mem += NM_I(sbi)->nat_cnt;
- npages = sbi->node_inode->i_mapping->nrpages;
+ npages = NODE_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- npages = sbi->meta_inode->i_mapping->nrpages;
+ npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
@@ -177,12 +173,12 @@ get_cache:
static int stat_show(struct seq_file *s, void *v)
{
- struct f2fs_stat_info *si, *next;
+ struct f2fs_stat_info *si;
int i = 0;
int j;
mutex_lock(&f2fs_stat_mutex);
- list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
+ list_for_each_entry(si, &f2fs_stat_list, stat_list) {
char devname[BDEVNAME_SIZE];
update_general_status(si->sbi);
@@ -202,6 +198,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "Other: %u)\n - Data: %u\n",
si->valid_node_count - si->valid_inode_count,
si->valid_count - si->valid_node_count);
+ seq_printf(s, " - Inline_data Inode: %u\n",
+ si->inline_inode);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@@ -235,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_count);
seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
si->prefree_count, si->free_segs, si->free_secs);
+ seq_printf(s, "CP calls: %d\n", si->cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d\n", si->data_segs);
@@ -244,32 +243,32 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - node blocks : %d\n", si->node_blks);
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
si->hit_ext, si->total_ext);
- seq_printf(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - nodes %4d in %4d\n",
+ seq_puts(s, "\nBalancing F2FS Async:\n");
+ seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
- seq_printf(s, " - dents %4d in dirs:%4d\n",
+ seq_printf(s, " - dents: %4d in dirs:%4d\n",
si->ndirty_dent, si->ndirty_dirs);
- seq_printf(s, " - meta %4d in %4d\n",
+ seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
- seq_printf(s, " - NATs %5d > %lu\n",
- si->nats, NM_WOUT_THRESHOLD);
- seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
- si->sits, si->fnids);
- seq_printf(s, "\nDistribution of User Blocks:");
- seq_printf(s, " [ valid | invalid | free ]\n");
- seq_printf(s, " [");
+ seq_printf(s, " - NATs: %9d\n - SITs: %9d\n",
+ si->nats, si->sits);
+ seq_printf(s, " - free_nids: %9d\n",
+ si->fnids);
+ seq_puts(s, "\nDistribution of User Blocks:");
+ seq_puts(s, " [ valid | invalid | free ]\n");
+ seq_puts(s, " [");
for (j = 0; j < si->util_valid; j++)
- seq_printf(s, "-");
- seq_printf(s, "|");
+ seq_putc(s, '-');
+ seq_putc(s, '|');
for (j = 0; j < si->util_invalid; j++)
- seq_printf(s, "-");
- seq_printf(s, "|");
+ seq_putc(s, '-');
+ seq_putc(s, '|');
for (j = 0; j < si->util_free; j++)
- seq_printf(s, "-");
- seq_printf(s, "]\n\n");
+ seq_putc(s, '-');
+ seq_puts(s, "]\n\n");
seq_printf(s, "SSR: %u blocks in %u segments\n",
si->block_count[SSR], si->segment_count[SSR]);
seq_printf(s, "LFS: %u blocks in %u segments\n",
@@ -307,11 +306,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
- sbi->stat_info = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
- if (!sbi->stat_info)
+ si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
+ if (!si)
return -ENOMEM;
- si = sbi->stat_info;
si->all_area_segs = le32_to_cpu(raw_super->segment_count);
si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
@@ -321,6 +319,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->main_area_zones = si->main_area_sections /
le32_to_cpu(raw_super->secs_per_zone);
si->sbi = sbi;
+ sbi->stat_info = si;
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
@@ -331,25 +330,43 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
{
- struct f2fs_stat_info *si = sbi->stat_info;
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
mutex_lock(&f2fs_stat_mutex);
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kfree(sbi->stat_info);
+ kfree(si);
}
void __init f2fs_create_root_stats(void)
{
- debugfs_root = debugfs_create_dir("f2fs", NULL);
- if (debugfs_root)
- debugfs_create_file("status", S_IRUGO, debugfs_root,
- NULL, &stat_fops);
+ struct dentry *file;
+
+ f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
+ if (!f2fs_debugfs_root)
+ goto bail;
+
+ file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
+ NULL, &stat_fops);
+ if (!file)
+ goto free_debugfs_dir;
+
+ return;
+
+free_debugfs_dir:
+ debugfs_remove(f2fs_debugfs_root);
+
+bail:
+ f2fs_debugfs_root = NULL;
+ return;
}
void f2fs_destroy_root_stats(void)
{
- debugfs_remove_recursive(debugfs_root);
- debugfs_root = NULL;
+ if (!f2fs_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(f2fs_debugfs_root);
+ f2fs_debugfs_root = NULL;
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a1f38443ece..a4addd72ebb 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -13,6 +13,7 @@
#include "f2fs.h"
#include "node.h"
#include "acl.h"
+#include "xattr.h"
static unsigned long dir_blocks(struct inode *inode)
{
@@ -20,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
>> PAGE_CACHE_SHIFT;
}
-static unsigned int dir_buckets(unsigned int level)
+static unsigned int dir_buckets(unsigned int level, int dir_level)
{
- if (level < MAX_DIR_HASH_DEPTH / 2)
- return 1 << level;
+ if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
+ return 1 << (level + dir_level);
else
- return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
+ return MAX_DIR_BUCKETS;
}
static unsigned int bucket_blocks(unsigned int level)
@@ -60,17 +61,18 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
{
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
-static unsigned long dir_block_index(unsigned int level, unsigned int idx)
+static unsigned long dir_block_index(unsigned int level,
+ int dir_level, unsigned int idx)
{
unsigned long i;
unsigned long bidx = 0;
for (i = 0; i < level; i++)
- bidx += dir_buckets(i) * bucket_blocks(i);
+ bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
bidx += idx * bucket_blocks(level);
return bidx;
}
@@ -92,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
- unsigned long bit_pos, end_pos, next_pos;
+ unsigned long bit_pos = 0;
struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
- int slots;
+ const void *dentry_bits = &dentry_blk->dentry_bitmap;
+ int max_len = 0;
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK, 0);
while (bit_pos < NR_DENTRY_IN_BLOCK) {
+ if (!test_bit_le(bit_pos, dentry_bits)) {
+ if (bit_pos == 0)
+ max_len = 1;
+ else if (!test_bit_le(bit_pos - 1, dentry_bits))
+ max_len++;
+ bit_pos++;
+ continue;
+ }
de = &dentry_blk->dentry[bit_pos];
- slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
-
if (early_match_name(name, namelen, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
name, namelen)) {
@@ -109,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
goto found;
}
}
- next_pos = bit_pos + slots;
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK, next_pos);
- if (bit_pos >= NR_DENTRY_IN_BLOCK)
- end_pos = NR_DENTRY_IN_BLOCK;
- else
- end_pos = bit_pos;
- if (*max_slots < end_pos - next_pos)
- *max_slots = end_pos - next_pos;
+ if (max_len > *max_slots) {
+ *max_slots = max_len;
+ max_len = 0;
+ }
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
de = NULL;
kunmap(dentry_page);
found:
+ if (max_len > *max_slots)
+ *max_slots = max_len;
return de;
}
@@ -138,17 +143,18 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
bool room = false;
int max_slots = 0;
- BUG_ON(level > MAX_DIR_HASH_DEPTH);
+ f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
- nbucket = dir_buckets(level);
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
- bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ le32_to_cpu(namehash) % nbucket);
end_block = bidx + nblock;
for (; bidx < end_block; bidx++) {
/* no need to allocate new dentry pages to all the indices */
- dentry_page = find_data_page(dir, bidx);
+ dentry_page = find_data_page(dir, bidx, true);
if (IS_ERR(dentry_page)) {
room = true;
continue;
@@ -212,9 +218,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
{
- struct page *page = NULL;
- struct f2fs_dir_entry *de = NULL;
- struct f2fs_dentry_block *dentry_blk = NULL;
+ struct page *page;
+ struct f2fs_dir_entry *de;
+ struct f2fs_dentry_block *dentry_blk;
page = get_lock_data_page(dir, 0);
if (IS_ERR(page))
@@ -246,11 +252,8 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode)
{
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-
- mutex_lock_op(sbi, DENTRY_OPS);
lock_page(page);
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
kunmap(page);
@@ -258,90 +261,148 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
- /* update parent inode number before releasing dentry page */
- F2FS_I(inode)->i_pino = dir->i_ino;
-
f2fs_put_page(page, 1);
- mutex_unlock_op(sbi, DENTRY_OPS);
}
-void init_dent_inode(const struct qstr *name, struct page *ipage)
+static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
- struct f2fs_node *rn;
-
- if (IS_ERR(ipage))
- return;
+ struct f2fs_inode *ri;
- wait_on_page_writeback(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE);
/* copy name info. to this inode page */
- rn = (struct f2fs_node *)page_address(ipage);
- rn->i.i_namelen = cpu_to_le32(name->len);
- memcpy(rn->i.i_name, name->name, name->len);
+ ri = F2FS_INODE(ipage);
+ ri->i_namelen = cpu_to_le32(name->len);
+ memcpy(ri->i_name, name->name, name->len);
set_page_dirty(ipage);
}
-static int init_inode_metadata(struct inode *inode,
+int update_dent_inode(struct inode *inode, const struct qstr *name)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct page *page;
+
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ init_dent_inode(name, page);
+ f2fs_put_page(page, 1);
+
+ return 0;
+}
+
+static int make_empty_dir(struct inode *inode,
+ struct inode *parent, struct page *page)
+{
+ struct page *dentry_page;
+ struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dir_entry *de;
+ void *kaddr;
+
+ dentry_page = get_new_data_page(inode, page, 0, true);
+ if (IS_ERR(dentry_page))
+ return PTR_ERR(dentry_page);
+
+ kaddr = kmap_atomic(dentry_page);
+ dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+ de = &dentry_blk->dentry[0];
+ de->name_len = cpu_to_le16(1);
+ de->hash_code = 0;
+ de->ino = cpu_to_le32(inode->i_ino);
+ memcpy(dentry_blk->filename[0], ".", 1);
+ set_de_type(de, inode);
+
+ de = &dentry_blk->dentry[1];
+ de->hash_code = 0;
+ de->name_len = cpu_to_le16(2);
+ de->ino = cpu_to_le32(parent->i_ino);
+ memcpy(dentry_blk->filename[1], "..", 2);
+ set_de_type(de, inode);
+
+ test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
+ test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
+ kunmap_atomic(kaddr);
+
+ set_page_dirty(dentry_page);
+ f2fs_put_page(dentry_page, 1);
+ return 0;
+}
+
+static struct page *init_inode_metadata(struct inode *inode,
struct inode *dir, const struct qstr *name)
{
+ struct page *page;
+ int err;
+
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
- int err;
- err = new_inode_page(inode, name);
- if (err)
- return err;
+ page = new_inode_page(inode, name);
+ if (IS_ERR(page))
+ return page;
if (S_ISDIR(inode->i_mode)) {
- err = f2fs_make_empty(inode, dir);
- if (err) {
- remove_inode_page(inode);
- return err;
- }
+ err = make_empty_dir(inode, dir, page);
+ if (err)
+ goto error;
}
- err = f2fs_init_acl(inode, dir);
- if (err) {
- remove_inode_page(inode);
- return err;
- }
+ err = f2fs_init_acl(inode, dir, page);
+ if (err)
+ goto put_error;
+
+ err = f2fs_init_security(inode, dir, name, page);
+ if (err)
+ goto put_error;
} else {
- struct page *ipage;
- ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
- if (IS_ERR(ipage))
- return PTR_ERR(ipage);
- set_cold_node(inode, ipage);
- init_dent_inode(name, ipage);
- f2fs_put_page(ipage, 1);
+ page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
+ if (IS_ERR(page))
+ return page;
+
+ set_cold_node(inode, page);
}
+
+ init_dent_inode(name, page);
+
+ /*
+ * This file should be checkpointed during fsync.
+ * We lost i_pino from now on.
+ */
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
+ file_lost_pino(inode);
inc_nlink(inode);
- f2fs_write_inode(inode, NULL);
}
- return 0;
+ return page;
+
+put_error:
+ f2fs_put_page(page, 1);
+error:
+ /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
+ truncate_inode_pages(&inode->i_data, 0);
+ truncate_blocks(inode, 0);
+ remove_dirty_dir_inode(inode);
+ remove_inode_page(inode);
+ return ERR_PTR(err);
}
static void update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth)
{
- bool need_dir_update = false;
-
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
if (S_ISDIR(inode->i_mode)) {
inc_nlink(dir);
- need_dir_update = true;
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
}
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(dir);
+
if (F2FS_I(dir)->i_current_depth != current_depth) {
F2FS_I(dir)->i_current_depth = current_depth;
- need_dir_update = true;
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
- if (need_dir_update)
- f2fs_write_inode(dir, NULL);
- else
- mark_inode_dirty(dir);
-
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
}
@@ -370,7 +431,12 @@ next:
goto next;
}
-int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode)
+/*
+ * Caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
+ */
+int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+ struct inode *inode)
{
unsigned int bit_pos;
unsigned int level;
@@ -379,11 +445,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in
f2fs_hash_t dentry_hash;
struct f2fs_dir_entry *de;
unsigned int nbucket, nblock;
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
size_t namelen = name->len;
struct page *dentry_page = NULL;
struct f2fs_dentry_block *dentry_blk = NULL;
int slots = GET_DENTRY_SLOTS(namelen);
+ struct page *page;
int err = 0;
int i;
@@ -396,25 +462,23 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in
}
start:
- if (current_depth == MAX_DIR_HASH_DEPTH)
+ if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
/* Increase the depth, if required */
if (level == current_depth)
++current_depth;
- nbucket = dir_buckets(level);
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
- bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ (le32_to_cpu(dentry_hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
- mutex_lock_op(sbi, DENTRY_OPS);
- dentry_page = get_new_data_page(dir, block, true);
- if (IS_ERR(dentry_page)) {
- mutex_unlock_op(sbi, DENTRY_OPS);
+ dentry_page = get_new_data_page(dir, NULL, block, true);
+ if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
- }
dentry_blk = kmap(dentry_page);
bit_pos = room_for_filename(dentry_blk, slots);
@@ -423,19 +487,20 @@ start:
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
- mutex_unlock_op(sbi, DENTRY_OPS);
}
/* Move to next level to find the empty slot for new dentry */
++level;
goto start;
add_dentry:
- err = init_inode_metadata(inode, dir, name);
- if (err)
- goto fail;
-
- wait_on_page_writeback(dentry_page);
+ f2fs_wait_on_page_writeback(dentry_page, DATA);
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, name);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
de = &dentry_blk->dentry[bit_pos];
de->hash_code = dentry_hash;
de->name_len = cpu_to_le16(namelen);
@@ -446,14 +511,21 @@ add_dentry:
test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
set_page_dirty(dentry_page);
- update_parent_metadata(dir, inode, current_depth);
-
- /* update parent inode number before releasing dentry page */
+ /* we don't need to mark_inode_dirty now */
F2FS_I(inode)->i_pino = dir->i_ino;
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ update_parent_metadata(dir, inode, current_depth);
fail:
+ up_write(&F2FS_I(inode)->i_sem);
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+ update_inode_page(dir);
+ clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
- mutex_unlock_op(sbi, DENTRY_OPS);
return err;
}
@@ -468,15 +540,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
unsigned int bit_pos;
struct address_space *mapping = page->mapping;
struct inode *dir = mapping->host;
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
void *kaddr = page_address(page);
int i;
- mutex_lock_op(sbi, DENTRY_OPS);
-
lock_page(page);
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
dentry_blk = (struct f2fs_dentry_block *)kaddr;
bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
@@ -492,72 +561,37 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- if (inode && S_ISDIR(inode->i_mode)) {
- drop_nlink(dir);
- f2fs_write_inode(dir, NULL);
- } else {
- mark_inode_dirty(dir);
- }
-
if (inode) {
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+
+ down_write(&F2FS_I(inode)->i_sem);
+
+ if (S_ISDIR(inode->i_mode)) {
+ drop_nlink(dir);
+ update_inode_page(dir);
+ }
inode->i_ctime = CURRENT_TIME;
drop_nlink(inode);
if (S_ISDIR(inode->i_mode)) {
drop_nlink(inode);
i_size_write(inode, 0);
}
- f2fs_write_inode(inode, NULL);
+ up_write(&F2FS_I(inode)->i_sem);
+ update_inode_page(inode);
+
if (inode->i_nlink == 0)
add_orphan_inode(sbi, inode->i_ino);
+ else
+ release_orphan_inode(sbi);
}
if (bit_pos == NR_DENTRY_IN_BLOCK) {
truncate_hole(dir, page->index, page->index + 1);
clear_page_dirty_for_io(page);
ClearPageUptodate(page);
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(dir);
}
f2fs_put_page(page, 1);
-
- mutex_unlock_op(sbi, DENTRY_OPS);
-}
-
-int f2fs_make_empty(struct inode *inode, struct inode *parent)
-{
- struct page *dentry_page;
- struct f2fs_dentry_block *dentry_blk;
- struct f2fs_dir_entry *de;
- void *kaddr;
-
- dentry_page = get_new_data_page(inode, 0, true);
- if (IS_ERR(dentry_page))
- return PTR_ERR(dentry_page);
-
- kaddr = kmap_atomic(dentry_page);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
-
- de = &dentry_blk->dentry[0];
- de->name_len = cpu_to_le16(1);
- de->hash_code = f2fs_dentry_hash(".", 1);
- de->ino = cpu_to_le32(inode->i_ino);
- memcpy(dentry_blk->filename[0], ".", 1);
- set_de_type(de, inode);
-
- de = &dentry_blk->dentry[1];
- de->hash_code = f2fs_dentry_hash("..", 2);
- de->name_len = cpu_to_le16(2);
- de->ino = cpu_to_le32(parent->i_ino);
- memcpy(dentry_blk->filename[1], "..", 2);
- set_de_type(de, inode);
-
- test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
- test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
- kunmap_atomic(kaddr);
-
- set_page_dirty(dentry_page);
- f2fs_put_page(dentry_page, 1);
- return 0;
}
bool f2fs_empty_dir(struct inode *dir)
@@ -597,34 +631,32 @@ bool f2fs_empty_dir(struct inode *dir)
return true;
}
-static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
+static int f2fs_readdir(struct file *file, struct dir_context *ctx)
{
- unsigned long pos = file->f_pos;
struct inode *inode = file_inode(file);
unsigned long npages = dir_blocks(inode);
- unsigned char *types = NULL;
- unsigned int bit_pos = 0, start_bit_pos = 0;
- int over = 0;
+ unsigned int bit_pos = 0;
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dir_entry *de = NULL;
struct page *dentry_page = NULL;
- unsigned int n = 0;
+ struct file_ra_state *ra = &file->f_ra;
+ unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
unsigned char d_type = DT_UNKNOWN;
- int slots;
- types = f2fs_filetype_table;
- bit_pos = (pos % NR_DENTRY_IN_BLOCK);
- n = (pos / NR_DENTRY_IN_BLOCK);
+ bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
+
+ /* readahead for multi pages of dir */
+ if (npages - n > 1 && !ra_has_index(ra, n))
+ page_cache_sync_readahead(inode->i_mapping, ra, file, n,
+ min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
- for ( ; n < npages; n++) {
+ for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n);
if (IS_ERR(dentry_page))
continue;
- start_bit_pos = bit_pos;
dentry_blk = kmap(dentry_page);
while (bit_pos < NR_DENTRY_IN_BLOCK) {
- d_type = DT_UNKNOWN;
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
bit_pos);
@@ -632,28 +664,26 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
break;
de = &dentry_blk->dentry[bit_pos];
- if (types && de->file_type < F2FS_FT_MAX)
- d_type = types[de->file_type];
-
- over = filldir(dirent,
+ if (de->file_type < F2FS_FT_MAX)
+ d_type = f2fs_filetype_table[de->file_type];
+ else
+ d_type = DT_UNKNOWN;
+ if (!dir_emit(ctx,
dentry_blk->filename[bit_pos],
le16_to_cpu(de->name_len),
- (n * NR_DENTRY_IN_BLOCK) + bit_pos,
- le32_to_cpu(de->ino), d_type);
- if (over) {
- file->f_pos += bit_pos - start_bit_pos;
- goto success;
- }
- slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
- bit_pos += slots;
+ le32_to_cpu(de->ino), d_type))
+ goto stop;
+
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos;
}
bit_pos = 0;
- file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK;
+ ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
dentry_page = NULL;
}
-success:
+stop:
if (dentry_page && !IS_ERR(dentry_page)) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
@@ -665,7 +695,7 @@ success:
const struct file_operations f2fs_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .readdir = f2fs_readdir,
+ .iterate = f2fs_readdir,
.fsync = f2fs_sync_file,
.unlocked_ioctl = f2fs_ioctl,
};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cc2213afdcc..58df97e174d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -17,6 +17,16 @@
#include <linux/slab.h>
#include <linux/crc32.h>
#include <linux/magic.h>
+#include <linux/kobject.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_F2FS_CHECK_FS
+#define f2fs_bug_on(condition) BUG_ON(condition)
+#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
+#else
+#define f2fs_bug_on(condition)
+#define f2fs_down_write(x, y) down_write(x)
+#endif
/*
* For mount options
@@ -28,6 +38,9 @@
#define F2FS_MOUNT_XATTR_USER 0x00000010
#define F2FS_MOUNT_POSIX_ACL 0x00000020
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
+#define F2FS_MOUNT_INLINE_XATTR 0x00000080
+#define F2FS_MOUNT_INLINE_DATA 0x00000100
+#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -37,21 +50,35 @@
typecheck(unsigned long long, b) && \
((long long)((a) - (b)) > 0))
-typedef u64 block_t;
+typedef u32 block_t; /*
+ * should not change u32, since it is the on-disk block
+ * address format, __le32.
+ */
typedef u32 nid_t;
struct f2fs_mount_info {
unsigned int opt;
};
-static inline __u32 f2fs_crc32(void *buff, size_t len)
+#define CRCPOLY_LE 0xedb88320
+
+static inline __u32 f2fs_crc32(void *buf, size_t len)
{
- return crc32_le(F2FS_SUPER_MAGIC, buff, len);
+ unsigned char *p = (unsigned char *)buf;
+ __u32 crc = F2FS_SUPER_MAGIC;
+ int i;
+
+ while (len--) {
+ crc ^= *p++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+ }
+ return crc;
}
-static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size)
+static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size)
{
- return f2fs_crc32(buff, buff_size) == blk_crc;
+ return f2fs_crc32(buf, buf_size) == blk_crc;
}
/*
@@ -62,6 +89,16 @@ enum {
SIT_BITMAP
};
+/*
+ * For CP/NAT/SIT/SSA readahead
+ */
+enum {
+ META_CP,
+ META_NAT,
+ META_SIT,
+ META_SSA
+};
+
/* for the list of orphan inodes */
struct orphan_inode_entry {
struct list_head list; /* list head */
@@ -74,6 +111,13 @@ struct dir_inode_entry {
struct inode *inode; /* vfs inode pointer */
};
+/* for the list of blockaddresses to be discarded */
+struct discard_entry {
+ struct list_head list; /* list head */
+ block_t blkaddr; /* block address to be discarded */
+ int len; /* # of consecutive blocks of the discard */
+};
+
/* for the list of fsync inodes, used only during recovery */
struct fsync_inode_entry {
struct list_head list; /* list head */
@@ -120,47 +164,63 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
/*
* For INODE and NODE manager
*/
-#define XATTR_NODE_OFFSET (-1) /*
- * store xattrs to one node block per
- * file keeping -1 as its node offset to
- * distinguish from index node blocks.
- */
-#define RDONLY_NODE 1 /*
- * specify a read-only mode when getting
- * a node block. 0 is read-write mode.
- * used by get_dnode_of_data().
+/*
+ * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1
+ * as its node offset to distinguish from index node blocks.
+ * But some bits are used to mark the node block.
+ */
+#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \
+ >> OFFSET_BIT_SHIFT)
+enum {
+ ALLOC_NODE, /* allocate a new node page if needed */
+ LOOKUP_NODE, /* look up a node without readahead */
+ LOOKUP_NODE_RA, /*
+ * look up a node with readahead called
+ * by get_data_block.
*/
+};
+
#define F2FS_LINK_MAX 32000 /* maximum link count per file */
+#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
+
/* for in-memory extent cache entry */
+#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */
+
struct extent_info {
rwlock_t ext_lock; /* rwlock for consistency */
unsigned int fofs; /* start offset in a file */
u32 blk_addr; /* start block address of the extent */
- unsigned int len; /* lenth of the extent */
+ unsigned int len; /* length of the extent */
};
/*
* i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
*/
#define FADVISE_COLD_BIT 0x01
+#define FADVISE_LOST_PINO_BIT 0x02
+
+#define DEF_DIR_LEVEL 0
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
+ unsigned char i_dir_level; /* use for dentry level for large dir */
unsigned int i_current_depth; /* use only in directory structure */
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
/* Use below internally in f2fs*/
unsigned long flags; /* use to pass per-file flags */
- unsigned long long data_version;/* latest version of data for fsync */
+ struct rw_semaphore i_sem; /* protect fi info */
atomic_t dirty_dents; /* # of dirty dentry pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
+ unsigned long long xattr_ver; /* cp version of xattr modification */
struct extent_info ext; /* in-memory extent cache entry */
+ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -186,8 +246,9 @@ static inline void set_raw_extent(struct extent_info *ext,
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
- nid_t init_scan_nid; /* the first nid to be scanned */
+ nid_t available_nids; /* maximum available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
+ unsigned int ram_thresh; /* control the memory footprint */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -197,6 +258,7 @@ struct f2fs_nm_info {
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
/* free node ids management */
+ struct radix_tree_root free_nid_root;/* root of the free_nid cache */
struct list_head free_nid_list; /* a list for free nids */
spinlock_t free_nid_list_lock; /* protect free nid list */
unsigned int fcnt; /* the number of free node id */
@@ -259,15 +321,27 @@ enum {
NO_CHECK_TYPE
};
+struct flush_cmd {
+ struct flush_cmd *next;
+ struct completion wait;
+ int ret;
+};
+
+struct flush_cmd_control {
+ struct task_struct *f2fs_issue_flush; /* flush thread */
+ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
+ struct flush_cmd *issue_list; /* list for command issue */
+ struct flush_cmd *dispatch_list; /* list for command dispatch */
+ spinlock_t issue_lock; /* for issue list lock */
+ struct flush_cmd *issue_tail; /* list tail of issue list */
+};
+
struct f2fs_sm_info {
struct sit_info *sit_info; /* whole segment information */
struct free_segmap_info *free_info; /* free segment information */
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
- struct list_head wblist_head; /* list of under-writeback pages */
- spinlock_t wblist_lock; /* lock for checkpoint */
-
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
block_t ssa_blkaddr; /* start block address of SSA area */
@@ -276,16 +350,22 @@ struct f2fs_sm_info {
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
unsigned int ovp_segments; /* # of overprovision segments */
-};
-/*
- * For directory operation
- */
-#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1)
-#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2)
-#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3)
-#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4)
-#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5)
+ /* a threshold to reclaim prefree segments */
+ unsigned int rec_prefree_segments;
+
+ /* for small discard management */
+ struct list_head discard_list; /* 4KB discard list */
+ int nr_discards; /* # of discards in the list */
+ int max_discards; /* max. discards to be issued */
+
+ unsigned int ipu_policy; /* in-place-update policy */
+ unsigned int min_ipu_util; /* in-place-update threshold */
+
+ /* for flush command control */
+ struct flush_cmd_control *cmd_control_info;
+
+};
/*
* For superblock
@@ -305,25 +385,6 @@ enum count_type {
};
/*
- * FS_LOCK nesting subclasses for the lock validator:
- *
- * The locking order between these classes is
- * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW
- * -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC
- */
-enum lock_type {
- RENAME, /* for renaming operations */
- DENTRY_OPS, /* for directory operations */
- DATA_WRITE, /* for data write */
- DATA_NEW, /* for data allocation */
- DATA_TRUNC, /* for data truncate */
- NODE_NEW, /* for node allocation */
- NODE_TRUNC, /* for node truncate */
- NODE_WRITE, /* for node write */
- NR_LOCK_TYPE,
-};
-
-/*
* The below are the page types of bios used in submti_bio().
* The available types are:
* DATA User data pages. It operates as async mode.
@@ -334,6 +395,7 @@ enum lock_type {
* with waiting the bio's completion
* ... Only can be used with META.
*/
+#define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
enum page_type {
DATA,
NODE,
@@ -342,8 +404,23 @@ enum page_type {
META_FLUSH,
};
+struct f2fs_io_info {
+ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
+ int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
+};
+
+#define is_read_io(rw) (((rw) & 1) == READ)
+struct f2fs_bio_info {
+ struct f2fs_sb_info *sbi; /* f2fs superblock */
+ struct bio *bio; /* bios to merge */
+ sector_t last_block_in_bio; /* last block number */
+ struct f2fs_io_info fio; /* store buffered io info. */
+ struct rw_semaphore io_rwsem; /* blocking op for bio */
+};
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
+ struct proc_dir_entry *s_proc; /* proc entry */
struct buffer_head *raw_super_buf; /* buffer head of raw sb */
struct f2fs_super_block *raw_super; /* raw super block pointer */
int s_dirty; /* dirty flag for checkpoint */
@@ -354,28 +431,31 @@ struct f2fs_sb_info {
/* for segment-related operations */
struct f2fs_sm_info *sm_info; /* segment manager */
- struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */
- sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */
- struct rw_semaphore bio_sem; /* IO semaphore */
+
+ /* for bio operations */
+ struct f2fs_bio_info read_io; /* for read bios */
+ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
+ struct completion *wait_io; /* for completion bios */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
struct inode *meta_inode; /* cache meta blocks */
- struct mutex cp_mutex; /* for checkpoint procedure */
- struct mutex fs_lock[NR_LOCK_TYPE]; /* for blocking FS operations */
- struct mutex write_inode; /* mutex for write inode */
+ struct mutex cp_mutex; /* checkpoint procedure lock */
+ struct rw_semaphore cp_rwsem; /* blocking FS operations */
+ struct mutex node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
- int por_doing; /* recovery is doing or not */
+ bool por_doing; /* recovery is doing or not */
+ wait_queue_head_t cp_wait;
/* for orphan inode management */
struct list_head orphan_inode_list; /* orphan inode list */
- struct mutex orphan_inode_mutex; /* for orphan inode list */
+ spinlock_t orphan_inode_lock; /* for orphan inode list */
unsigned int n_orphans; /* # of orphan inodes */
+ unsigned int max_orphans; /* max orphan inodes */
/* for directory inode management */
struct list_head dir_inode_list; /* dir inode list */
spinlock_t dir_inode_lock; /* for dir inode list lock */
- unsigned int n_dirty_dirs; /* # of dir inodes */
/* basic file system units */
unsigned int log_sectors_per_block; /* log2 sectors per block */
@@ -393,6 +473,7 @@ struct f2fs_sb_info {
unsigned int total_valid_node_count; /* valid node block count */
unsigned int total_valid_inode_count; /* valid inode count */
int active_logs; /* # of active logs */
+ int dir_level; /* directory level */
block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
@@ -406,18 +487,30 @@ struct f2fs_sb_info {
/* for cleaning operations */
struct mutex gc_mutex; /* mutex for GC */
struct f2fs_gc_kthread *gc_thread; /* GC thread */
+ unsigned int cur_victim_sec; /* current victim section num */
+
+ /* maximum # of trials to find a victim segment for SSR and GC */
+ unsigned int max_victim_search;
/*
* for stat information.
* one is for the LFS mode, and the other is for the SSR mode.
*/
+#ifdef CONFIG_F2FS_STAT_FS
struct f2fs_stat_info *stat_info; /* FS status information */
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
- unsigned int last_victim[2]; /* last victim segment # */
int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
+ int inline_inode; /* # of inline_data inodes */
int bg_gc; /* background gc calls */
+ unsigned int n_dirty_dirs; /* # of dir inodes */
+#endif
+ unsigned int last_victim[2]; /* last victim segment # */
spinlock_t stat_lock; /* lock for stat operations */
+
+ /* For sysfs suppport */
+ struct kobject s_kobj;
+ struct completion s_kobj_unregister;
};
/*
@@ -443,6 +536,16 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
return (struct f2fs_checkpoint *)(sbi->ckpt);
}
+static inline struct f2fs_node *F2FS_NODE(struct page *page)
+{
+ return (struct f2fs_node *)page_address(page);
+}
+
+static inline struct f2fs_inode *F2FS_INODE(struct page *page)
+{
+ return &((struct f2fs_node *)page_address(page))->i;
+}
+
static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
{
return (struct f2fs_nm_info *)(sbi->nm_info);
@@ -468,6 +571,16 @@ static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)
return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);
}
+static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi)
+{
+ return sbi->meta_inode->i_mapping;
+}
+
+static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi)
+{
+ return sbi->node_inode->i_mapping;
+}
+
static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)
{
sbi->s_dirty = 1;
@@ -478,6 +591,11 @@ static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi)
sbi->s_dirty = 0;
}
+static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
+{
+ return le64_to_cpu(cp->checkpoint_ver);
+}
+
static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
{
unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
@@ -498,22 +616,36 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
cp->ckpt_flags = cpu_to_le32(ckpt_flags);
}
-static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t)
+static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
- mutex_lock_nested(&sbi->fs_lock[t], t);
+ down_read(&sbi->cp_rwsem);
}
-static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t)
+static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
{
- mutex_unlock(&sbi->fs_lock[t]);
+ up_read(&sbi->cp_rwsem);
+}
+
+static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
+{
+ f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
+}
+
+static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
+{
+ up_write(&sbi->cp_rwsem);
}
/*
* Check whether the given nid is within node id range.
*/
-static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
+static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
- BUG_ON((nid >= NM_I(sbi)->max_nid));
+ if (unlikely(nid < F2FS_ROOT_INO(sbi)))
+ return -EINVAL;
+ if (unlikely(nid >= NM_I(sbi)->max_nid))
+ return -EINVAL;
+ return 0;
}
#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1
@@ -524,9 +656,14 @@ static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
static inline int F2FS_HAS_BLOCKS(struct inode *inode)
{
if (F2FS_I(inode)->i_xattr_nid)
- return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1);
+ return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;
else
- return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS);
+ return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
+}
+
+static inline bool f2fs_has_xattr_block(unsigned int ofs)
+{
+ return ofs == XATTR_NODE_OFFSET;
}
static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
@@ -537,7 +674,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
valid_block_count =
sbi->total_valid_block_count + (block_t)count;
- if (valid_block_count > sbi->user_block_count) {
+ if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
@@ -548,17 +685,16 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
return true;
}
-static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
+static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
blkcnt_t count)
{
spin_lock(&sbi->stat_lock);
- BUG_ON(sbi->total_valid_block_count < (block_t) count);
- BUG_ON(inode->i_blocks < count);
+ f2fs_bug_on(sbi->total_valid_block_count < (block_t) count);
+ f2fs_bug_on(inode->i_blocks < count);
inode->i_blocks -= count;
sbi->total_valid_block_count -= (block_t)count;
spin_unlock(&sbi->stat_lock);
- return 0;
}
static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -569,6 +705,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_inc_dirty_dents(struct inode *inode)
{
+ inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
atomic_inc(&F2FS_I(inode)->dirty_dents);
}
@@ -579,6 +716,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_dec_dirty_dents(struct inode *inode)
{
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
atomic_dec(&F2FS_I(inode)->dirty_dents);
}
@@ -587,6 +728,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
return atomic_read(&sbi->nr_pages[count_type]);
}
+static inline int get_dirty_dents(struct inode *inode)
+{
+ return atomic_read(&F2FS_I(inode)->dirty_dents);
+}
+
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
{
unsigned int pages_per_sec = sbi->segs_per_sec *
@@ -597,11 +743,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
{
- block_t ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_block_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_block_count;
}
static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
@@ -620,16 +762,25 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- int offset = (flag == NAT_BITMAP) ?
+ int offset;
+
+ if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+ if (flag == NAT_BITMAP)
+ return &ckpt->sit_nat_version_bitmap;
+ else
+ return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+ } else {
+ offset = (flag == NAT_BITMAP) ?
le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+ return &ckpt->sit_nat_version_bitmap + offset;
+ }
}
static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
{
block_t start_addr;
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver);
+ unsigned long long ckpt_version = cur_cp_version(ckpt);
start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
@@ -649,96 +800,85 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
}
static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
- struct inode *inode,
- unsigned int count)
+ struct inode *inode)
{
block_t valid_block_count;
unsigned int valid_node_count;
spin_lock(&sbi->stat_lock);
- valid_block_count = sbi->total_valid_block_count + (block_t)count;
- sbi->alloc_valid_block_count += (block_t)count;
- valid_node_count = sbi->total_valid_node_count + count;
-
- if (valid_block_count > sbi->user_block_count) {
+ valid_block_count = sbi->total_valid_block_count + 1;
+ if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
- if (valid_node_count > sbi->total_node_count) {
+ valid_node_count = sbi->total_valid_node_count + 1;
+ if (unlikely(valid_node_count > sbi->total_node_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
if (inode)
- inode->i_blocks += count;
- sbi->total_valid_node_count = valid_node_count;
- sbi->total_valid_block_count = valid_block_count;
+ inode->i_blocks++;
+
+ sbi->alloc_valid_block_count++;
+ sbi->total_valid_node_count++;
+ sbi->total_valid_block_count++;
spin_unlock(&sbi->stat_lock);
return true;
}
static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
- struct inode *inode,
- unsigned int count)
+ struct inode *inode)
{
spin_lock(&sbi->stat_lock);
- BUG_ON(sbi->total_valid_block_count < count);
- BUG_ON(sbi->total_valid_node_count < count);
- BUG_ON(inode->i_blocks < count);
+ f2fs_bug_on(!sbi->total_valid_block_count);
+ f2fs_bug_on(!sbi->total_valid_node_count);
+ f2fs_bug_on(!inode->i_blocks);
- inode->i_blocks -= count;
- sbi->total_valid_node_count -= count;
- sbi->total_valid_block_count -= (block_t)count;
+ inode->i_blocks--;
+ sbi->total_valid_node_count--;
+ sbi->total_valid_block_count--;
spin_unlock(&sbi->stat_lock);
}
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
{
- unsigned int ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_node_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_node_count;
}
static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
{
spin_lock(&sbi->stat_lock);
- BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count);
+ f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count);
sbi->total_valid_inode_count++;
spin_unlock(&sbi->stat_lock);
}
-static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
+static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
{
spin_lock(&sbi->stat_lock);
- BUG_ON(!sbi->total_valid_inode_count);
+ f2fs_bug_on(!sbi->total_valid_inode_count);
sbi->total_valid_inode_count--;
spin_unlock(&sbi->stat_lock);
- return 0;
}
static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
{
- unsigned int ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_inode_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_inode_count;
}
static inline void f2fs_put_page(struct page *page, int unlock)
{
- if (!page || IS_ERR(page))
+ if (!page)
return;
if (unlock) {
- BUG_ON(!PageLocked(page));
+ f2fs_bug_on(!PageLocked(page));
unlock_page(page);
}
page_cache_release(page);
@@ -755,16 +895,30 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)
}
static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
- size_t size, void (*ctor)(void *))
+ size_t size)
{
- return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
+ return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
+}
+
+static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags)
+{
+ void *entry;
+retry:
+ entry = kmem_cache_alloc(cachep, flags);
+ if (!entry) {
+ cond_resched();
+ goto retry;
+ }
+
+ return entry;
}
#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
static inline bool IS_INODE(struct page *page)
{
- struct f2fs_node *p = (struct f2fs_node *)page_address(page);
+ struct f2fs_node *p = F2FS_NODE(page);
return RAW_IS_INODE(p);
}
@@ -778,7 +932,7 @@ static inline block_t datablock_addr(struct page *node_page,
{
struct f2fs_node *raw_node;
__le32 *addr_array;
- raw_node = (struct f2fs_node *)page_address(node_page);
+ raw_node = F2FS_NODE(node_page);
addr_array = blkaddr_in_node(raw_node);
return le32_to_cpu(addr_array[offset]);
}
@@ -819,10 +973,16 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)
/* used for f2fs_inode_info->flags */
enum {
FI_NEW_INODE, /* indicate newly allocated inode */
- FI_NEED_CP, /* need to do checkpoint during fsync */
+ FI_DIRTY_INODE, /* indicate inode is dirty or not */
+ FI_DIRTY_DIR, /* indicate directory has dirty pages */
FI_INC_LINK, /* need to increment i_nlink */
FI_ACL_MODE, /* indicate acl mode */
FI_NO_ALLOC, /* should not allocate any blocks */
+ FI_UPDATE_DIR, /* should update inode block for consistency */
+ FI_DELAY_IPUT, /* used for the recovery */
+ FI_NO_EXTENT, /* not to use the extent cache */
+ FI_INLINE_XATTR, /* used for inline xattr */
+ FI_INLINE_DATA, /* used for inline data*/
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -855,14 +1015,96 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag)
return 0;
}
+static inline void get_inline_info(struct f2fs_inode_info *fi,
+ struct f2fs_inode *ri)
+{
+ if (ri->i_inline & F2FS_INLINE_XATTR)
+ set_inode_flag(fi, FI_INLINE_XATTR);
+ if (ri->i_inline & F2FS_INLINE_DATA)
+ set_inode_flag(fi, FI_INLINE_DATA);
+}
+
+static inline void set_raw_inline(struct f2fs_inode_info *fi,
+ struct f2fs_inode *ri)
+{
+ ri->i_inline = 0;
+
+ if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+ ri->i_inline |= F2FS_INLINE_XATTR;
+ if (is_inode_flag_set(fi, FI_INLINE_DATA))
+ ri->i_inline |= F2FS_INLINE_DATA;
+}
+
+static inline int f2fs_has_inline_xattr(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
+}
+
+static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
+{
+ if (f2fs_has_inline_xattr(&fi->vfs_inode))
+ return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
+ return DEF_ADDRS_PER_INODE;
+}
+
+static inline void *inline_xattr_addr(struct page *page)
+{
+ struct f2fs_inode *ri = F2FS_INODE(page);
+ return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
+ F2FS_INLINE_XATTR_ADDRS]);
+}
+
+static inline int inline_xattr_size(struct inode *inode)
+{
+ if (f2fs_has_inline_xattr(inode))
+ return F2FS_INLINE_XATTR_ADDRS << 2;
+ else
+ return 0;
+}
+
+static inline int f2fs_has_inline_data(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
+}
+
+static inline void *inline_data_addr(struct page *page)
+{
+ struct f2fs_inode *ri = F2FS_INODE(page);
+ return (void *)&(ri->i_addr[1]);
+}
+
+static inline int f2fs_readonly(struct super_block *sb)
+{
+ return sb->s_flags & MS_RDONLY;
+}
+
+static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
+{
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ sbi->sb->s_flags |= MS_RDONLY;
+}
+
+#define get_inode_mode(i) \
+ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
+ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
+
+/* get offset of first page in next direct node */
+#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \
+ ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \
+ (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \
+ ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+
/*
* file.c
*/
int f2fs_sync_file(struct file *, loff_t, loff_t, int);
void truncate_data_blocks(struct dnode_of_data *);
+int truncate_blocks(struct inode *, u64);
void f2fs_truncate(struct inode *);
+int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int f2fs_setattr(struct dentry *, struct iattr *);
int truncate_hole(struct inode *, pgoff_t, pgoff_t);
+int truncate_data_blocks_range(struct dnode_of_data *, int);
long f2fs_ioctl(struct file *, unsigned int, unsigned long);
long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
@@ -871,7 +1113,9 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
*/
void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
+int try_to_free_nats(struct f2fs_sb_info *, int);
void update_inode(struct inode *, struct page *);
+void update_inode_page(struct inode *);
int f2fs_write_inode(struct inode *, struct writeback_control *);
void f2fs_evict_inode(struct inode *);
@@ -889,7 +1133,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
struct page *, struct inode *);
-void init_dent_inode(const struct qstr *, struct page *);
+int update_dent_inode(struct inode *, const struct qstr *);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
int f2fs_make_empty(struct inode *, struct inode *);
@@ -919,13 +1163,18 @@ f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
struct dnode_of_data;
struct node_info;
+bool available_free_memory(struct f2fs_sb_info *, int);
int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
+bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
+void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
-int remove_inode_page(struct inode *);
-int new_inode_page(struct inode *, const struct qstr *);
-struct page *new_node_page(struct dnode_of_data *, unsigned int);
+int truncate_xattr_node(struct inode *, struct page *);
+int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
+void remove_inode_page(struct inode *);
+struct page *new_inode_page(struct inode *, const struct qstr *);
+struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
void ra_node_page(struct f2fs_sb_info *, nid_t);
struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_node_page_ra(struct page *, int);
@@ -936,6 +1185,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
void recover_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, struct node_info *, block_t);
+bool recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
int restore_node_summary(struct f2fs_sb_info *, unsigned int,
struct f2fs_summary_block *);
@@ -949,45 +1199,55 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void f2fs_balance_fs(struct f2fs_sb_info *);
+void f2fs_balance_fs_bg(struct f2fs_sb_info *);
+int f2fs_issue_flush(struct f2fs_sb_info *);
+int create_flush_cmd_control(struct f2fs_sb_info *);
+void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
-void locate_dirty_segment(struct f2fs_sb_info *, unsigned int);
+void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
-struct bio *f2fs_bio_alloc(struct block_device *, int);
-void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
void write_meta_page(struct f2fs_sb_info *, struct page *);
-void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
- block_t, block_t *);
-void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
- block_t, block_t *);
-void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t);
+void write_node_page(struct f2fs_sb_info *, struct page *,
+ struct f2fs_io_info *, unsigned int, block_t, block_t *);
+void write_data_page(struct page *, struct dnode_of_data *, block_t *,
+ struct f2fs_io_info *);
+void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
void recover_data_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
void rewrite_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
+void allocate_data_block(struct f2fs_sb_info *, struct page *,
+ block_t, block_t *, struct f2fs_summary *, int);
+void f2fs_wait_on_page_writeback(struct page *, enum page_type);
void write_data_summaries(struct f2fs_sb_info *, block_t);
void write_node_summaries(struct f2fs_sb_info *, block_t);
int lookup_journal_in_cursum(struct f2fs_summary_block *,
int, unsigned int, int);
void flush_sit_entries(struct f2fs_sb_info *);
int build_segment_manager(struct f2fs_sb_info *);
-void reset_victim_segmap(struct f2fs_sb_info *);
void destroy_segment_manager(struct f2fs_sb_info *);
+int __init create_segment_manager_caches(void);
+void destroy_segment_manager_caches(void);
/*
* checkpoint.c
*/
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
-int check_orphan_space(struct f2fs_sb_info *);
+int acquire_orphan_inode(struct f2fs_sb_info *);
+void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
-int recover_orphan_inodes(struct f2fs_sb_info *);
+void recover_orphan_inodes(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *);
void set_dirty_dir_page(struct inode *, struct page *);
+void add_dirty_dir_inode(struct inode *);
void remove_dirty_dir_inode(struct inode *);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, bool);
@@ -998,20 +1258,25 @@ void destroy_checkpoint_caches(void);
/*
* data.c
*/
+void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
+int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int);
+void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t,
+ struct f2fs_io_info *);
int reserve_new_block(struct dnode_of_data *);
+int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
void update_extent_cache(block_t, struct dnode_of_data *);
-struct page *find_data_page(struct inode *, pgoff_t);
+struct page *find_data_page(struct inode *, pgoff_t, bool);
struct page *get_lock_data_page(struct inode *, pgoff_t);
-struct page *get_new_data_page(struct inode *, pgoff_t, bool);
-int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int);
-int do_write_data_page(struct page *);
+struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
+int do_write_data_page(struct page *, struct f2fs_io_info *);
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
/*
* gc.c
*/
int start_gc_thread(struct f2fs_sb_info *);
void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int);
+block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
int f2fs_gc(struct f2fs_sb_info *);
void build_gc_manager(struct f2fs_sb_info *);
int __init create_gc_caches(void);
@@ -1020,7 +1285,7 @@ void destroy_gc_caches(void);
/*
* recovery.c
*/
-void recover_fsync_data(struct f2fs_sb_info *);
+int recover_fsync_data(struct f2fs_sb_info *);
bool space_for_roll_forward(struct f2fs_sb_info *);
/*
@@ -1037,13 +1302,13 @@ struct f2fs_stat_info {
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, sits, fnids;
int total_count, utilization;
- int bg_gc;
+ int bg_gc, inline_inode;
unsigned int valid_count, valid_node_count, valid_inode_count;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
int rsvd_segs, overp_segs;
int dirty_count, node_pages, meta_pages;
- int prefree_count, call_count;
+ int prefree_count, call_count, cp_count;
int tot_segs, node_segs, data_segs, free_segs, free_secs;
int tot_blks, data_blks, node_blks;
int curseg[NR_CURSEG_TYPE];
@@ -1055,11 +1320,37 @@ struct f2fs_stat_info {
unsigned base_mem, cache_mem;
};
-#define stat_inc_call_count(si) ((si)->call_count++)
+static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
+{
+ return (struct f2fs_stat_info *)sbi->stat_info;
+}
+
+#define stat_inc_cp_count(si) ((si)->cp_count++)
+#define stat_inc_call_count(si) ((si)->call_count++)
+#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
+#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
+#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
+#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++)
+#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++)
+#define stat_inc_inline_inode(inode) \
+ do { \
+ if (f2fs_has_inline_data(inode)) \
+ ((F2FS_SB(inode->i_sb))->inline_inode++); \
+ } while (0)
+#define stat_dec_inline_inode(inode) \
+ do { \
+ if (f2fs_has_inline_data(inode)) \
+ ((F2FS_SB(inode->i_sb))->inline_inode--); \
+ } while (0)
+
+#define stat_inc_seg_type(sbi, curseg) \
+ ((sbi)->segment_count[(curseg)->alloc_type]++)
+#define stat_inc_block_count(sbi, curseg) \
+ ((sbi)->block_count[(curseg)->alloc_type]++)
#define stat_inc_seg_count(sbi, type) \
do { \
- struct f2fs_stat_info *si = sbi->stat_info; \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
(si)->tot_segs++; \
if (type == SUM_TYPE_DATA) \
si->data_segs++; \
@@ -1072,14 +1363,14 @@ struct f2fs_stat_info {
#define stat_inc_data_blk_count(sbi, blks) \
do { \
- struct f2fs_stat_info *si = sbi->stat_info; \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
stat_inc_tot_blk_count(si, blks); \
si->data_blks += (blks); \
} while (0)
#define stat_inc_node_blk_count(sbi, blks) \
do { \
- struct f2fs_stat_info *si = sbi->stat_info; \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
stat_inc_tot_blk_count(si, blks); \
si->node_blks += (blks); \
} while (0)
@@ -1089,7 +1380,17 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);
void __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
+#define stat_inc_cp_count(si)
#define stat_inc_call_count(si)
+#define stat_inc_bggc_count(si)
+#define stat_inc_dirty_dir(sbi)
+#define stat_dec_dirty_dir(sbi)
+#define stat_inc_total_hit(sb)
+#define stat_inc_read_hit(sb)
+#define stat_inc_inline_inode(inode)
+#define stat_dec_inline_inode(inode)
+#define stat_inc_seg_type(sbi, curseg)
+#define stat_inc_block_count(sbi, curseg)
#define stat_inc_seg_count(si, type)
#define stat_inc_tot_blk_count(si, blks)
#define stat_inc_data_blk_count(si, blks)
@@ -1110,4 +1411,14 @@ extern const struct address_space_operations f2fs_meta_aops;
extern const struct inode_operations f2fs_dir_inode_operations;
extern const struct inode_operations f2fs_symlink_inode_operations;
extern const struct inode_operations f2fs_special_inode_operations;
+
+/*
+ * inline.c
+ */
+bool f2fs_may_inline(struct inode *);
+int f2fs_read_inline_data(struct inode *, struct page *);
+int f2fs_convert_inline_data(struct inode *, pgoff_t);
+int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
+void truncate_inline_data(struct inode *, u64);
+int recover_inline_data(struct inode *, struct page *);
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 958a46da19a..7d8b9627509 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -13,17 +13,20 @@
#include <linux/stat.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/blkdev.h>
#include <linux/falloc.h>
#include <linux/types.h>
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <linux/mount.h>
+#include <linux/pagevec.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "xattr.h"
#include "acl.h"
+#include <trace/events/f2fs.h>
static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
@@ -31,7 +34,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- block_t old_blk_addr;
struct dnode_of_data dn;
int err;
@@ -39,34 +41,19 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
sb_start_pagefault(inode->i_sb);
- mutex_lock_op(sbi, DATA_NEW);
-
/* block allocation */
+ f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, page->index, 0);
- if (err) {
- mutex_unlock_op(sbi, DATA_NEW);
+ err = f2fs_reserve_block(&dn, page->index);
+ f2fs_unlock_op(sbi);
+ if (err)
goto out;
- }
-
- old_blk_addr = dn.data_blkaddr;
-
- if (old_blk_addr == NULL_ADDR) {
- err = reserve_new_block(&dn);
- if (err) {
- f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, DATA_NEW);
- goto out;
- }
- }
- f2fs_put_dnode(&dn);
-
- mutex_unlock_op(sbi, DATA_NEW);
+ file_update_time(vma->vm_file);
lock_page(page);
- if (page->mapping != inode->i_mapping ||
- page_offset(page) >= i_size_read(inode) ||
- !PageUptodate(page)) {
+ if (unlikely(page->mapping != inode->i_mapping ||
+ page_offset(page) > i_size_read(inode) ||
+ !PageUptodate(page))) {
unlock_page(page);
err = -EFAULT;
goto out;
@@ -76,10 +63,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
* check to see if the page is mapped already (no holes)
*/
if (PageMappedToDisk(page))
- goto out;
-
- /* fill the page */
- wait_on_page_writeback(page);
+ goto mapped;
/* page is wholly or partially inside EOF */
if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) {
@@ -90,7 +74,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
set_page_dirty(page);
SetPageUptodate(page);
- file_update_time(vma->vm_file);
+ trace_f2fs_vm_page_mkwrite(page, DATA);
+mapped:
+ /* fill the page */
+ f2fs_wait_on_page_writeback(page, DATA);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(err);
@@ -98,32 +85,36 @@ out:
static const struct vm_operations_struct f2fs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = f2fs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
-static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
+static int get_parent_ino(struct inode *inode, nid_t *pino)
{
struct dentry *dentry;
- nid_t pino;
inode = igrab(inode);
dentry = d_find_any_alias(inode);
- if (!dentry) {
- iput(inode);
+ iput(inode);
+ if (!dentry)
+ return 0;
+
+ if (update_dent_inode(inode, &dentry->d_name)) {
+ dput(dentry);
return 0;
}
- pino = dentry->d_parent->d_inode->i_ino;
+
+ *pino = parent_ino(dentry);
dput(dentry);
- iput(inode);
- return !is_checkpointed_node(sbi, pino);
+ return 1;
}
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- unsigned long long cur_version;
int ret = 0;
bool need_cp = false;
struct writeback_control wbc = {
@@ -132,58 +123,204 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
.for_reclaim = 0,
};
- if (inode->i_sb->s_flags & MS_RDONLY)
+ if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
+ trace_f2fs_sync_file_enter(inode);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret)
+ if (ret) {
+ trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
+ }
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
- mutex_lock(&inode->i_mutex);
-
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- goto out;
-
- mutex_lock(&sbi->cp_mutex);
- cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
- mutex_unlock(&sbi->cp_mutex);
-
- if (F2FS_I(inode)->data_version != cur_version &&
- !(inode->i_state & I_DIRTY))
- goto out;
- F2FS_I(inode)->data_version--;
+ down_read(&fi->i_sem);
+ /*
+ * Both of fdatasync() and fsync() are able to be recovered from
+ * sudden-power-off.
+ */
if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
need_cp = true;
- else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
+ else if (file_wrong_pino(inode))
need_cp = true;
else if (!space_for_roll_forward(sbi))
need_cp = true;
- else if (need_to_sync_dir(sbi, inode))
+ else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
need_cp = true;
+ else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
+ need_cp = true;
+
+ up_read(&fi->i_sem);
if (need_cp) {
+ nid_t pino;
+
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
- clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
+
+ down_write(&fi->i_sem);
+ F2FS_I(inode)->xattr_ver = 0;
+ if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
+ get_parent_ino(inode, &pino)) {
+ F2FS_I(inode)->i_pino = pino;
+ file_got_pino(inode);
+ up_write(&fi->i_sem);
+ mark_inode_dirty_sync(inode);
+ ret = f2fs_write_inode(inode, NULL);
+ if (ret)
+ goto out;
+ } else {
+ up_write(&fi->i_sem);
+ }
} else {
/* if there is no written node page, write its inode page */
while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
+ if (fsync_mark_done(sbi, inode->i_ino))
+ goto out;
+ mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
goto out;
}
- filemap_fdatawait_range(sbi->node_inode->i_mapping,
- 0, LONG_MAX);
+ ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
+ if (ret)
+ goto out;
+ ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
- mutex_unlock(&inode->i_mutex);
+ trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
+static pgoff_t __get_first_dirty_index(struct address_space *mapping,
+ pgoff_t pgofs, int whence)
+{
+ struct pagevec pvec;
+ int nr_pages;
+
+ if (whence != SEEK_DATA)
+ return 0;
+
+ /* find first dirty page index */
+ pagevec_init(&pvec, 0);
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
+ pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+ pagevec_release(&pvec);
+ return pgofs;
+}
+
+static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+ int whence)
+{
+ switch (whence) {
+ case SEEK_DATA:
+ if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+ (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ return true;
+ break;
+ case SEEK_HOLE:
+ if (blkaddr == NULL_ADDR)
+ return true;
+ break;
+ }
+ return false;
+}
+
+static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+ struct dnode_of_data dn;
+ pgoff_t pgofs, end_offset, dirty;
+ loff_t data_ofs = offset;
+ loff_t isize;
+ int err = 0;
+
+ mutex_lock(&inode->i_mutex);
+
+ isize = i_size_read(inode);
+ if (offset >= isize)
+ goto fail;
+
+ /* handle inline data case */
+ if (f2fs_has_inline_data(inode)) {
+ if (whence == SEEK_HOLE)
+ data_ofs = isize;
+ goto found;
+ }
+
+ pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+
+ dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
+
+ for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+ if (err && err != -ENOENT) {
+ goto fail;
+ } else if (err == -ENOENT) {
+ /* direct node is not exist */
+ if (whence == SEEK_DATA) {
+ pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
+ F2FS_I(inode));
+ continue;
+ } else {
+ goto found;
+ }
+ }
+
+ end_offset = IS_INODE(dn.node_page) ?
+ ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+
+ /* find data/hole in dnode block */
+ for (; dn.ofs_in_node < end_offset;
+ dn.ofs_in_node++, pgofs++,
+ data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ block_t blkaddr;
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+ if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ f2fs_put_dnode(&dn);
+ goto found;
+ }
+ }
+ f2fs_put_dnode(&dn);
+ }
+
+ if (whence == SEEK_DATA)
+ goto fail;
+found:
+ if (whence == SEEK_HOLE && data_ofs > isize)
+ data_ofs = isize;
+ mutex_unlock(&inode->i_mutex);
+ return vfs_setpos(file, data_ofs, maxbytes);
+fail:
+ mutex_unlock(&inode->i_mutex);
+ return -ENXIO;
+}
+
+static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+
+ switch (whence) {
+ case SEEK_SET:
+ case SEEK_CUR:
+ case SEEK_END:
+ return generic_file_llseek_size(file, offset, whence,
+ maxbytes, i_size_read(inode));
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ return f2fs_seek_block(file, offset, whence);
+ }
+
+ return -EINVAL;
+}
+
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
@@ -191,31 +328,34 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
{
int nr_free = 0, ofs = dn->ofs_in_node;
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
struct f2fs_node *raw_node;
__le32 *addr;
- raw_node = page_address(dn->node_page);
+ raw_node = F2FS_NODE(dn->node_page);
addr = blkaddr_in_node(raw_node) + ofs;
- for ( ; count > 0; count--, addr++, dn->ofs_in_node++) {
+ for (; count > 0; count--, addr++, dn->ofs_in_node++) {
block_t blkaddr = le32_to_cpu(*addr);
if (blkaddr == NULL_ADDR)
continue;
update_extent_cache(NULL_ADDR, dn);
invalidate_blocks(sbi, blkaddr);
- dec_valid_block_count(sbi, dn->inode, 1);
nr_free++;
}
if (nr_free) {
+ dec_valid_block_count(sbi, dn->inode, nr_free);
set_page_dirty(dn->node_page);
sync_inode_page(dn);
}
dn->ofs_in_node = ofs;
+
+ trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
+ dn->ofs_in_node, nr_free);
return nr_free;
}
@@ -229,50 +369,60 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
struct page *page;
+ if (f2fs_has_inline_data(inode))
+ return truncate_inline_data(inode, from);
+
if (!offset)
return;
- page = find_data_page(inode, from >> PAGE_CACHE_SHIFT);
+ page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
if (IS_ERR(page))
return;
lock_page(page);
- wait_on_page_writeback(page);
+ if (unlikely(page->mapping != inode->i_mapping)) {
+ f2fs_put_page(page, 1);
+ return;
+ }
+ f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
f2fs_put_page(page, 1);
}
-static int truncate_blocks(struct inode *inode, u64 from)
+int truncate_blocks(struct inode *inode, u64 from)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blocksize = inode->i_sb->s_blocksize;
struct dnode_of_data dn;
pgoff_t free_from;
- int count = 0;
- int err;
+ int count = 0, err = 0;
+
+ trace_f2fs_truncate_blocks_enter(inode, from);
+
+ if (f2fs_has_inline_data(inode))
+ goto done;
free_from = (pgoff_t)
((from + blocksize - 1) >> (sbi->log_blocksize));
- mutex_lock_op(sbi, DATA_TRUNC);
+ f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, free_from, RDONLY_NODE);
+ err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
if (err) {
if (err == -ENOENT)
goto free_next;
- mutex_unlock_op(sbi, DATA_TRUNC);
+ f2fs_unlock_op(sbi);
+ trace_f2fs_truncate_blocks_exit(inode, err);
return err;
}
- if (IS_INODE(dn.node_page))
- count = ADDRS_PER_INODE;
- else
- count = ADDRS_PER_BLOCK;
+ count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
count -= dn.ofs_in_node;
- BUG_ON(count < 0);
+ f2fs_bug_on(count < 0);
+
if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
truncate_data_blocks_range(&dn, count);
free_from += count;
@@ -281,11 +431,12 @@ static int truncate_blocks(struct inode *inode, u64 from)
f2fs_put_dnode(&dn);
free_next:
err = truncate_inode_blocks(inode, free_from);
- mutex_unlock_op(sbi, DATA_TRUNC);
-
+ f2fs_unlock_op(sbi);
+done:
/* lastly zero out the first data page */
truncate_partial_data_page(inode, from);
+ trace_f2fs_truncate_blocks_exit(inode, err);
return err;
}
@@ -295,13 +446,15 @@ void f2fs_truncate(struct inode *inode)
S_ISLNK(inode->i_mode)))
return;
+ trace_f2fs_truncate(inode);
+
if (!truncate_blocks(inode, i_size_read(inode))) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
}
}
-static int f2fs_getattr(struct vfsmount *mnt,
+int f2fs_getattr(struct vfsmount *mnt,
struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
@@ -353,6 +506,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
+ err = f2fs_convert_inline_data(inode, attr->ia_size);
+ if (err)
+ return err;
+
truncate_setsize(inode, attr->ia_size);
f2fs_truncate(inode);
f2fs_balance_fs(F2FS_SB(inode->i_sb));
@@ -361,7 +518,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
__setattr_copy(inode, attr);
if (attr->ia_valid & ATTR_MODE) {
- err = f2fs_acl_chmod(inode);
+ err = posix_acl_chmod(inode, get_inode_mode(inode));
if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
inode->i_mode = fi->i_acl_mode;
clear_inode_flag(fi, FI_ACL_MODE);
@@ -376,12 +533,14 @@ const struct inode_operations f2fs_file_inode_operations = {
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
+ .set_acl = f2fs_set_acl,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = f2fs_listxattr,
.removexattr = generic_removexattr,
#endif
+ .fiemap = f2fs_fiemap,
};
static void fill_zero(struct inode *inode, pgoff_t index,
@@ -395,12 +554,12 @@ static void fill_zero(struct inode *inode, pgoff_t index,
f2fs_balance_fs(sbi);
- mutex_lock_op(sbi, DATA_NEW);
- page = get_new_data_page(inode, index, false);
- mutex_unlock_op(sbi, DATA_NEW);
+ f2fs_lock_op(sbi);
+ page = get_new_data_page(inode, NULL, index, false);
+ f2fs_unlock_op(sbi);
if (!IS_ERR(page)) {
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -414,15 +573,10 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
for (index = pg_start; index < pg_end; index++) {
struct dnode_of_data dn;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- f2fs_balance_fs(sbi);
-
- mutex_lock_op(sbi, DATA_TRUNC);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, RDONLY_NODE);
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err) {
- mutex_unlock_op(sbi, DATA_TRUNC);
if (err == -ENOENT)
continue;
return err;
@@ -431,17 +585,20 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
if (dn.data_blkaddr != NULL_ADDR)
truncate_data_blocks_range(&dn, 1);
f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, DATA_TRUNC);
}
return 0;
}
-static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
+static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t pg_start, pg_end;
loff_t off_start, off_end;
int ret = 0;
+ ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1);
+ if (ret)
+ return ret;
+
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -461,21 +618,21 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
if (pg_start < pg_end) {
struct address_space *mapping = inode->i_mapping;
loff_t blk_start, blk_end;
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+
+ f2fs_balance_fs(sbi);
blk_start = pg_start << PAGE_CACHE_SHIFT;
blk_end = pg_end << PAGE_CACHE_SHIFT;
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
+
+ f2fs_lock_op(sbi);
ret = truncate_hole(inode, pg_start, pg_end);
+ f2fs_unlock_op(sbi);
}
}
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- i_size_read(inode) <= (offset + len)) {
- i_size_write(inode, offset);
- mark_inode_dirty(inode);
- }
-
return ret;
}
@@ -492,36 +649,29 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (ret)
return ret;
+ ret = f2fs_convert_inline_data(inode, offset + len);
+ if (ret)
+ return ret;
+
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
off_start = offset & (PAGE_CACHE_SIZE - 1);
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+ f2fs_lock_op(sbi);
+
for (index = pg_start; index <= pg_end; index++) {
struct dnode_of_data dn;
- mutex_lock_op(sbi, DATA_NEW);
+ if (index == pg_end && !off_end)
+ goto noalloc;
set_new_dnode(&dn, inode, NULL, NULL, 0);
- ret = get_dnode_of_data(&dn, index, 0);
- if (ret) {
- mutex_unlock_op(sbi, DATA_NEW);
+ ret = f2fs_reserve_block(&dn, index);
+ if (ret)
break;
- }
-
- if (dn.data_blkaddr == NULL_ADDR) {
- ret = reserve_new_block(&dn);
- if (ret) {
- f2fs_put_dnode(&dn);
- mutex_unlock_op(sbi, DATA_NEW);
- break;
- }
- }
- f2fs_put_dnode(&dn);
-
- mutex_unlock_op(sbi, DATA_NEW);
-
+noalloc:
if (pg_start == pg_end)
new_size = offset + len;
else if (index == pg_start && off_start)
@@ -536,7 +686,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
i_size_read(inode) < new_size) {
i_size_write(inode, new_size);
mark_inode_dirty(inode);
+ update_inode_page(inode);
}
+ f2fs_unlock_op(sbi);
return ret;
}
@@ -550,8 +702,10 @@ static long f2fs_fallocate(struct file *file, int mode,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
+ mutex_lock(&inode->i_mutex);
+
if (mode & FALLOC_FL_PUNCH_HOLE)
- ret = punch_hole(inode, offset, len, mode);
+ ret = punch_hole(inode, offset, len);
else
ret = expand_inode_data(inode, offset, len, mode);
@@ -559,6 +713,10 @@ static long f2fs_fallocate(struct file *file, int mode,
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
}
+
+ mutex_unlock(&inode->i_mutex);
+
+ trace_f2fs_fallocate(inode, mode, offset, len, ret);
return ret;
}
@@ -583,14 +741,14 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
int ret;
switch (cmd) {
- case FS_IOC_GETFLAGS:
+ case F2FS_IOC_GETFLAGS:
flags = fi->i_flags & FS_FL_USER_VISIBLE;
return put_user(flags, (int __user *) arg);
- case FS_IOC_SETFLAGS:
+ case F2FS_IOC_SETFLAGS:
{
unsigned int oldflags;
- ret = mnt_want_write(filp->f_path.mnt);
+ ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -627,7 +785,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
out:
- mnt_drop_write(filp->f_path.mnt);
+ mnt_drop_write_file(filp);
return ret;
}
default:
@@ -653,11 +811,11 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
#endif
const struct file_operations f2fs_file_operations = {
- .llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
+ .llseek = f2fs_llseek,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
.open = generic_file_open,
.mmap = f2fs_file_mmap,
.fsync = f2fs_sync_file,
@@ -667,5 +825,5 @@ const struct file_operations f2fs_file_operations = {
.compat_ioctl = f2fs_compat_ioctl,
#endif
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 94b8a0c4845..b90dbe55403 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -11,7 +11,6 @@
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
-#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/f2fs_fs.h>
#include <linux/kthread.h>
@@ -23,16 +22,18 @@
#include "node.h"
#include "segment.h"
#include "gc.h"
+#include <trace/events/f2fs.h>
static struct kmem_cache *winode_slab;
static int gc_thread_func(void *data)
{
struct f2fs_sb_info *sbi = data;
+ struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
long wait_ms;
- wait_ms = GC_THREAD_MIN_SLEEP_TIME;
+ wait_ms = gc_th->min_sleep_time;
do {
if (try_to_freeze())
@@ -45,7 +46,7 @@ static int gc_thread_func(void *data)
break;
if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
- wait_ms = GC_THREAD_MAX_SLEEP_TIME;
+ wait_ms = increase_sleep_time(gc_th, wait_ms);
continue;
}
@@ -66,23 +67,24 @@ static int gc_thread_func(void *data)
continue;
if (!is_idle(sbi)) {
- wait_ms = increase_sleep_time(wait_ms);
+ wait_ms = increase_sleep_time(gc_th, wait_ms);
mutex_unlock(&sbi->gc_mutex);
continue;
}
if (has_enough_invalid_blocks(sbi))
- wait_ms = decrease_sleep_time(wait_ms);
+ wait_ms = decrease_sleep_time(gc_th, wait_ms);
else
- wait_ms = increase_sleep_time(wait_ms);
+ wait_ms = increase_sleep_time(gc_th, wait_ms);
- sbi->bg_gc++;
+ stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi))
- wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
- else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
- wait_ms = GC_THREAD_MAX_SLEEP_TIME;
+ wait_ms = gc_th->no_gc_sleep_time;
+
+ /* balancing f2fs's metadata periodically */
+ f2fs_balance_fs_bg(sbi);
} while (!kthread_should_stop());
return 0;
@@ -92,23 +94,33 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th;
dev_t dev = sbi->sb->s_bdev->bd_dev;
+ int err = 0;
if (!test_opt(sbi, BG_GC))
- return 0;
+ goto out;
gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
- if (!gc_th)
- return -ENOMEM;
+ if (!gc_th) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
+ gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
+ gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
+
+ gc_th->gc_idle = 0;
sbi->gc_thread = gc_th;
init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
+ err = PTR_ERR(gc_th->f2fs_gc_task);
kfree(gc_th);
sbi->gc_thread = NULL;
- return -ENOMEM;
}
- return 0;
+out:
+ return err;
}
void stop_gc_thread(struct f2fs_sb_info *sbi)
@@ -121,9 +133,17 @@ void stop_gc_thread(struct f2fs_sb_info *sbi)
sbi->gc_thread = NULL;
}
-static int select_gc_type(int gc_type)
+static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
{
- return (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+ int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+
+ if (gc_th && gc_th->gc_idle) {
+ if (gc_th->gc_idle == 1)
+ gc_mode = GC_CB;
+ else if (gc_th->gc_idle == 2)
+ gc_mode = GC_GREEDY;
+ }
+ return gc_mode;
}
static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
@@ -131,15 +151,21 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- if (p->alloc_mode) {
+ if (p->alloc_mode == SSR) {
p->gc_mode = GC_GREEDY;
p->dirty_segmap = dirty_i->dirty_segmap[type];
+ p->max_search = dirty_i->nr_dirty[type];
p->ofs_unit = 1;
} else {
- p->gc_mode = select_gc_type(gc_type);
+ p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
+ p->max_search = dirty_i->nr_dirty[DIRTY];
p->ofs_unit = sbi->segs_per_sec;
}
+
+ if (p->max_search > sbi->max_victim_search)
+ p->max_search = sbi->max_victim_search;
+
p->offset = sbi->last_victim[p->gc_mode];
}
@@ -160,18 +186,21 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int segno;
+ unsigned int hint = 0;
+ unsigned int secno;
/*
* If the gc_type is FG_GC, we can select victim segments
* selected by background GC before.
* Those segments guarantee they have small valid blocks.
*/
- segno = find_next_bit(dirty_i->victim_segmap[BG_GC],
- TOTAL_SEGS(sbi), 0);
- if (segno < TOTAL_SEGS(sbi)) {
- clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
- return segno;
+next:
+ secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
+ if (secno < TOTAL_SECS(sbi)) {
+ if (sec_usage_check(sbi, secno))
+ goto next;
+ clear_bit(secno, dirty_i->victim_secmap);
+ return secno * sbi->segs_per_sec;
}
return NULL_SEGNO;
}
@@ -208,8 +237,8 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
-static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
- struct victim_sel_policy *p)
+static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
+ unsigned int segno, struct victim_sel_policy *p)
{
if (p->alloc_mode == SSR)
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
@@ -222,7 +251,7 @@ static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
}
/*
- * This function is called from two pathes.
+ * This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
* When it is called during GC, it just gets a victim segment
* and it does not remove it from dirty seglist.
@@ -234,14 +263,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct victim_sel_policy p;
- unsigned int segno;
+ unsigned int secno, max_cost;
int nsearched = 0;
p.alloc_mode = alloc_mode;
select_policy(sbi, gc_type, type, &p);
p.min_segno = NULL_SEGNO;
- p.min_cost = get_max_cost(sbi, &p);
+ p.min_cost = max_cost = get_max_cost(sbi, &p);
mutex_lock(&dirty_i->seglist_lock);
@@ -253,6 +282,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
while (1) {
unsigned long cost;
+ unsigned int segno;
segno = find_next_bit(p.dirty_segmap,
TOTAL_SEGS(sbi), p.offset);
@@ -264,14 +294,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
break;
}
- p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
- if (test_bit(segno, dirty_i->victim_segmap[FG_GC]))
- continue;
- if (gc_type == BG_GC &&
- test_bit(segno, dirty_i->victim_segmap[BG_GC]))
+ p.offset = segno + p.ofs_unit;
+ if (p.ofs_unit > 1)
+ p.offset -= segno % p.ofs_unit;
+
+ secno = GET_SECNO(sbi, segno);
+
+ if (sec_usage_check(sbi, secno))
continue;
- if (IS_CURSEC(sbi, GET_SECNO(sbi, segno)))
+ if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
continue;
cost = get_gc_cost(sbi, segno, &p);
@@ -279,25 +311,29 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (p.min_cost > cost) {
p.min_segno = segno;
p.min_cost = cost;
- }
-
- if (cost == get_max_cost(sbi, &p))
+ } else if (unlikely(cost == max_cost)) {
continue;
+ }
- if (nsearched++ >= MAX_VICTIM_SEARCH) {
+ if (nsearched++ >= p.max_search) {
sbi->last_victim[p.gc_mode] = segno;
break;
}
}
-got_it:
if (p.min_segno != NULL_SEGNO) {
- *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+got_it:
if (p.alloc_mode == LFS) {
- int i;
- for (i = 0; i < p.ofs_unit; i++)
- set_bit(*result + i,
- dirty_i->victim_segmap[gc_type]);
+ secno = GET_SECNO(sbi, p.min_segno);
+ if (gc_type == FG_GC)
+ sbi->cur_victim_sec = secno;
+ else
+ set_bit(secno, dirty_i->victim_secmap);
}
+ *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+
+ trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
+ sbi->cur_victim_sec,
+ prefree_segments(sbi), free_segments(sbi));
}
mutex_unlock(&dirty_i->seglist_lock);
@@ -310,35 +346,24 @@ static const struct victim_selection default_v_ops = {
static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
{
- struct list_head *this;
struct inode_entry *ie;
- list_for_each(this, ilist) {
- ie = list_entry(this, struct inode_entry, list);
+ list_for_each_entry(ie, ilist, list)
if (ie->inode->i_ino == ino)
return ie->inode;
- }
return NULL;
}
static void add_gc_inode(struct inode *inode, struct list_head *ilist)
{
- struct list_head *this;
- struct inode_entry *new_ie, *ie;
+ struct inode_entry *new_ie;
- list_for_each(this, ilist) {
- ie = list_entry(this, struct inode_entry, list);
- if (ie->inode == inode) {
- iput(inode);
- return;
- }
- }
-repeat:
- new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
- if (!new_ie) {
- cond_resched();
- goto repeat;
+ if (inode == find_gc_inode(inode->i_ino, ilist)) {
+ iput(inode);
+ return;
}
+
+ new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
new_ie->inode = inode;
list_add_tail(&new_ie->list, ilist);
}
@@ -381,6 +406,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
next_step:
entry = sum;
+
for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
@@ -401,11 +427,17 @@ next_step:
continue;
/* set page dirty and write it */
- if (!PageWriteback(node_page))
+ if (gc_type == FG_GC) {
+ f2fs_wait_on_page_writeback(node_page, NODE);
set_page_dirty(node_page);
+ } else {
+ if (!PageWriteback(node_page))
+ set_page_dirty(node_page);
+ }
f2fs_put_page(node_page, 1);
stat_inc_node_blk_count(sbi, 1);
}
+
if (initial) {
initial = false;
goto next_step;
@@ -418,6 +450,13 @@ next_step:
.for_reclaim = 0,
};
sync_node_pages(sbi, 0, &wbc);
+
+ /*
+ * In the case of FG_GC, it'd be better to reclaim this victim
+ * completely.
+ */
+ if (get_valid_blocks(sbi, segno, 1) != 0)
+ goto next_step;
}
}
@@ -428,7 +467,7 @@ next_step:
* as indirect or double indirect node blocks, are given, it must be a caller's
* bug.
*/
-block_t start_bidx_of_node(unsigned int node_ofs)
+block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
{
unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
unsigned int bidx;
@@ -445,7 +484,7 @@ block_t start_bidx_of_node(unsigned int node_ofs)
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
- return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
+ return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
}
static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -481,29 +520,23 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
static void move_data_page(struct inode *inode, struct page *page, int gc_type)
{
- if (page->mapping != inode->i_mapping)
- goto out;
-
- if (inode != page->mapping->host)
- goto out;
-
- if (PageWriteback(page))
- goto out;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = WRITE_SYNC,
+ };
if (gc_type == BG_GC) {
+ if (PageWriteback(page))
+ goto out;
set_page_dirty(page);
set_cold_data(page);
} else {
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- mutex_lock_op(sbi, DATA_WRITE);
- if (clear_page_dirty_for_io(page) &&
- S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
+ f2fs_wait_on_page_writeback(page, DATA);
+
+ if (clear_page_dirty_for_io(page))
inode_dec_dirty_dents(inode);
- }
set_cold_data(page);
- do_write_data_page(page);
- mutex_unlock_op(sbi, DATA_WRITE);
+ do_write_data_page(page, &fio);
clear_cold_data(page);
}
out:
@@ -530,6 +563,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
next_step:
entry = sum;
+
for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
struct page *data_page;
struct inode *inode;
@@ -558,7 +592,6 @@ next_step:
continue;
}
- start_bidx = start_bidx_of_node(nofs);
ofs_in_node = le16_to_cpu(entry->ofs_in_node);
if (phase == 2) {
@@ -566,8 +599,10 @@ next_step:
if (IS_ERR(inode))
continue;
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+
data_page = find_data_page(inode,
- start_bidx + ofs_in_node);
+ start_bidx + ofs_in_node, false);
if (IS_ERR(data_page))
goto next_iput;
@@ -576,6 +611,8 @@ next_step:
} else {
inode = find_gc_inode(dni.ino, ilist);
if (inode) {
+ start_bidx = start_bidx_of_node(nofs,
+ F2FS_I(inode));
data_page = get_lock_data_page(inode,
start_bidx + ofs_in_node);
if (IS_ERR(data_page))
@@ -588,11 +625,22 @@ next_step:
next_iput:
iput(inode);
}
+
if (++phase < 4)
goto next_step;
- if (gc_type == FG_GC)
- f2fs_submit_bio(sbi, DATA, true);
+ if (gc_type == FG_GC) {
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+
+ /*
+ * In the case of FG_GC, it'd be better to reclaim this victim
+ * completely.
+ */
+ if (get_valid_blocks(sbi, segno, 1) != 0) {
+ phase = 2;
+ goto next_step;
+ }
+ }
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -611,18 +659,13 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
{
struct page *sum_page;
struct f2fs_summary_block *sum;
+ struct blk_plug plug;
/* read segment summary of victim */
sum_page = get_sum_page(sbi, segno);
- if (IS_ERR(sum_page))
- return;
- /*
- * CP needs to lock sum_page. In this time, we don't need
- * to lock this page, because this summary page is not gone anywhere.
- * Also, this page is not gonna be updated before GC is done.
- */
- unlock_page(sum_page);
+ blk_start_plug(&plug);
+
sum = page_address(sum_page);
switch (GET_SUM_TYPE((&sum->footer))) {
@@ -633,10 +676,12 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
break;
}
+ blk_finish_plug(&plug);
+
stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
stat_inc_call_count(sbi->stat_info);
- f2fs_put_page(sum_page, 0);
+ f2fs_put_page(sum_page, 1);
}
int f2fs_gc(struct f2fs_sb_info *sbi)
@@ -649,22 +694,33 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&ilist);
gc_more:
- if (!(sbi->sb->s_flags & MS_ACTIVE))
+ if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
+ goto stop;
+ if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
goto stop;
- if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree))
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
gc_type = FG_GC;
+ write_checkpoint(sbi, false);
+ }
if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
goto stop;
ret = 0;
+ /* readahead multi ssa blocks those have contiguous address */
+ if (sbi->segs_per_sec > 1)
+ ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
+ META_SSA);
+
for (i = 0; i < sbi->segs_per_sec; i++)
do_garbage_collect(sbi, segno + i, &ilist, gc_type);
- if (gc_type == FG_GC &&
- get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
+ if (gc_type == FG_GC) {
+ sbi->cur_victim_sec = NULL_SEGNO;
nfree++;
+ WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec));
+ }
if (has_not_enough_free_secs(sbi, nfree))
goto gc_more;
@@ -686,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
int __init create_gc_caches(void)
{
winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
- sizeof(struct inode_entry), NULL);
+ sizeof(struct inode_entry));
if (!winode_slab)
return -ENOMEM;
return 0;
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 30b2db003ac..5d5eb6047bf 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -13,18 +13,26 @@
* whether IO subsystem is idle
* or not
*/
-#define GC_THREAD_MIN_SLEEP_TIME 10000 /* milliseconds */
-#define GC_THREAD_MAX_SLEEP_TIME 30000
-#define GC_THREAD_NOGC_SLEEP_TIME 10000
+#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
+#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
+#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
/* Search max. number of dirty segments to select a victim segment */
-#define MAX_VICTIM_SEARCH 20
+#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
struct f2fs_gc_kthread {
struct task_struct *f2fs_gc_task;
wait_queue_head_t gc_wait_queue_head;
+
+ /* for gc sleep time */
+ unsigned int min_sleep_time;
+ unsigned int max_sleep_time;
+ unsigned int no_gc_sleep_time;
+
+ /* for changing gc mode */
+ unsigned int gc_idle;
};
struct inode_entry {
@@ -56,19 +64,25 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
}
-static inline long increase_sleep_time(long wait)
+static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait)
{
- wait += GC_THREAD_MIN_SLEEP_TIME;
- if (wait > GC_THREAD_MAX_SLEEP_TIME)
- wait = GC_THREAD_MAX_SLEEP_TIME;
+ if (wait == gc_th->no_gc_sleep_time)
+ return wait;
+
+ wait += gc_th->min_sleep_time;
+ if (wait > gc_th->max_sleep_time)
+ wait = gc_th->max_sleep_time;
return wait;
}
-static inline long decrease_sleep_time(long wait)
+static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait)
{
- wait -= GC_THREAD_MIN_SLEEP_TIME;
- if (wait <= GC_THREAD_MIN_SLEEP_TIME)
- wait = GC_THREAD_MIN_SLEEP_TIME;
+ if (wait == gc_th->no_gc_sleep_time)
+ wait = gc_th->max_sleep_time;
+
+ wait -= gc_th->min_sleep_time;
+ if (wait <= gc_th->min_sleep_time)
+ wait = gc_th->min_sleep_time;
return wait;
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
new file mode 100644
index 00000000000..1bba5228c19
--- /dev/null
+++ b/fs/f2fs/inline.c
@@ -0,0 +1,250 @@
+/*
+ * fs/f2fs/inline.c
+ * Copyright (c) 2013, Intel Corporation
+ * Authors: Huajun Li <huajun.li@intel.com>
+ * Haicheng Li <haicheng.li@intel.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+
+bool f2fs_may_inline(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ block_t nr_blocks;
+ loff_t i_size;
+
+ if (!test_opt(sbi, INLINE_DATA))
+ return false;
+
+ nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
+ if (inode->i_blocks > nr_blocks)
+ return false;
+
+ i_size = i_size_read(inode);
+ if (i_size > MAX_INLINE_DATA)
+ return false;
+
+ return true;
+}
+
+int f2fs_read_inline_data(struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct page *ipage;
+ void *src_addr, *dst_addr;
+
+ if (page->index) {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ goto out;
+ }
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ unlock_page(page);
+ return PTR_ERR(ipage);
+ }
+
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+ /* Copy the whole inline data block */
+ src_addr = inline_data_addr(ipage);
+ dst_addr = kmap(page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ kunmap(page);
+ f2fs_put_page(ipage, 1);
+
+out:
+ SetPageUptodate(page);
+ unlock_page(page);
+
+ return 0;
+}
+
+static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
+{
+ int err;
+ struct page *ipage;
+ struct dnode_of_data dn;
+ void *src_addr, *dst_addr;
+ block_t new_blk_addr;
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = WRITE_SYNC | REQ_PRIO,
+ };
+
+ f2fs_lock_op(sbi);
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
+ }
+
+ /*
+ * i_addr[0] is not used for inline data,
+ * so reserving new block will not destroy inline data
+ */
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, 0);
+ if (err)
+ goto out;
+
+ f2fs_wait_on_page_writeback(page, DATA);
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+ /* Copy the whole inline data block */
+ src_addr = inline_data_addr(ipage);
+ dst_addr = kmap(page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ kunmap(page);
+ SetPageUptodate(page);
+
+ /* write data page to try to make data consistent */
+ set_page_writeback(page);
+ write_data_page(page, &dn, &new_blk_addr, &fio);
+ update_extent_cache(new_blk_addr, &dn);
+ f2fs_wait_on_page_writeback(page, DATA);
+
+ /* clear inline data and flag after data writeback */
+ zero_user_segment(ipage, INLINE_DATA_OFFSET,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ stat_dec_inline_inode(inode);
+
+ sync_inode_page(&dn);
+ f2fs_put_dnode(&dn);
+out:
+ f2fs_unlock_op(sbi);
+ return err;
+}
+
+int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
+{
+ struct page *page;
+ int err;
+
+ if (!f2fs_has_inline_data(inode))
+ return 0;
+ else if (to_size <= MAX_INLINE_DATA)
+ return 0;
+
+ page = grab_cache_page(inode->i_mapping, 0);
+ if (!page)
+ return -ENOMEM;
+
+ err = __f2fs_convert_inline_data(inode, page);
+ f2fs_put_page(page, 1);
+ return err;
+}
+
+int f2fs_write_inline_data(struct inode *inode,
+ struct page *page, unsigned size)
+{
+ void *src_addr, *dst_addr;
+ struct page *ipage;
+ struct dnode_of_data dn;
+ int err;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
+ if (err)
+ return err;
+ ipage = dn.inode_page;
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ zero_user_segment(ipage, INLINE_DATA_OFFSET,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ src_addr = kmap(page);
+ dst_addr = inline_data_addr(ipage);
+ memcpy(dst_addr, src_addr, size);
+ kunmap(page);
+
+ /* Release the first data block if it is allocated */
+ if (!f2fs_has_inline_data(inode)) {
+ truncate_data_blocks_range(&dn, 1);
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ stat_inc_inline_inode(inode);
+ }
+
+ sync_inode_page(&dn);
+ f2fs_put_dnode(&dn);
+
+ return 0;
+}
+
+void truncate_inline_data(struct inode *inode, u64 from)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct page *ipage;
+
+ if (from >= MAX_INLINE_DATA)
+ return;
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage))
+ return;
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
+ zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ set_page_dirty(ipage);
+ f2fs_put_page(ipage, 1);
+}
+
+int recover_inline_data(struct inode *inode, struct page *npage)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct f2fs_inode *ri = NULL;
+ void *src_addr, *dst_addr;
+ struct page *ipage;
+
+ /*
+ * The inline_data recovery policy is as follows.
+ * [prev.] [next] of inline_data flag
+ * o o -> recover inline_data
+ * o x -> remove inline_data, and then recover data blocks
+ * x o -> remove inline_data, and then recover inline_data
+ * x x -> recover data blocks
+ */
+ if (IS_INODE(npage))
+ ri = F2FS_INODE(npage);
+
+ if (f2fs_has_inline_data(inode) &&
+ ri && ri->i_inline & F2FS_INLINE_DATA) {
+process_inline:
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(IS_ERR(ipage));
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
+ src_addr = inline_data_addr(npage);
+ dst_addr = inline_data_addr(ipage);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+ return -1;
+ }
+
+ if (f2fs_has_inline_data(inode)) {
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(IS_ERR(ipage));
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ zero_user_segment(ipage, INLINE_DATA_OFFSET,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+ } else if (ri && ri->i_inline & F2FS_INLINE_DATA) {
+ truncate_blocks(inode, 0);
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ goto process_inline;
+ }
+ return 0;
+}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ddae412d30c..2cf6962f6cc 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -12,27 +12,59 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/bitops.h>
#include "f2fs.h"
#include "node.h"
+#include <trace/events/f2fs.h>
+
void f2fs_set_inode_flags(struct inode *inode)
{
unsigned int flags = F2FS_I(inode)->i_flags;
-
- inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
- S_NOATIME | S_DIRSYNC);
+ unsigned int new_fl = 0;
if (flags & FS_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & FS_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & FS_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & FS_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+ set_mask_bits(&inode->i_flags,
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
+}
+
+static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+{
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ if (ri->i_addr[0])
+ inode->i_rdev =
+ old_decode_dev(le32_to_cpu(ri->i_addr[0]));
+ else
+ inode->i_rdev =
+ new_decode_dev(le32_to_cpu(ri->i_addr[1]));
+ }
+}
+
+static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+{
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ if (old_valid_dev(inode->i_rdev)) {
+ ri->i_addr[0] =
+ cpu_to_le32(old_encode_dev(inode->i_rdev));
+ ri->i_addr[1] = 0;
+ } else {
+ ri->i_addr[0] = 0;
+ ri->i_addr[1] =
+ cpu_to_le32(new_encode_dev(inode->i_rdev));
+ ri->i_addr[2] = 0;
+ }
+ }
}
static int do_read_inode(struct inode *inode)
@@ -40,18 +72,21 @@ static int do_read_inode(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
- struct f2fs_node *rn;
struct f2fs_inode *ri;
/* Check if ino is within scope */
- check_nid_range(sbi, inode->i_ino);
+ if (check_nid_range(sbi, inode->i_ino)) {
+ f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
+ (unsigned long) inode->i_ino);
+ WARN_ON(1);
+ return -EINVAL;
+ }
node_page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page))
return PTR_ERR(node_page);
- rn = page_address(node_page);
- ri = &(rn->i);
+ ri = F2FS_INODE(node_page);
inode->i_mode = le16_to_cpu(ri->i_mode);
i_uid_write(inode, le32_to_cpu(ri->i_uid));
@@ -67,19 +102,21 @@ static int do_read_inode(struct inode *inode)
inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
inode->i_generation = le32_to_cpu(ri->i_generation);
- if (ri->i_addr[0])
- inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
- else
- inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
fi->i_flags = le32_to_cpu(ri->i_flags);
fi->flags = 0;
- fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
+ fi->i_dir_level = ri->i_dir_level;
+
get_extent_info(&fi->ext, ri->i_ext);
+ get_inline_info(fi, ri);
+
+ /* get rdev by using inline_info */
+ __get_inode_rdev(inode, ri);
+
f2fs_put_page(node_page, 1);
return 0;
}
@@ -88,25 +125,22 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
- int ret;
+ int ret = 0;
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+
+ if (!(inode->i_state & I_NEW)) {
+ trace_f2fs_iget(inode);
return inode;
+ }
if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
goto make_now;
ret = do_read_inode(inode);
if (ret)
goto bad_inode;
-
- if (!sbi->por_doing && inode->i_nlink == 0) {
- ret = -ENOENT;
- goto bad_inode;
- }
-
make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
@@ -122,8 +156,7 @@ make_now:
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE |
- __GFP_ZERO);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
@@ -136,23 +169,22 @@ make_now:
goto bad_inode;
}
unlock_new_inode(inode);
-
+ trace_f2fs_iget(inode);
return inode;
bad_inode:
iget_failed(inode);
+ trace_f2fs_iget_exit(inode, ret);
return ERR_PTR(ret);
}
void update_inode(struct inode *inode, struct page *node_page)
{
- struct f2fs_node *rn;
struct f2fs_inode *ri;
- wait_on_page_writeback(node_page);
+ f2fs_wait_on_page_writeback(node_page, NODE);
- rn = page_address(node_page);
- ri = &(rn->i);
+ ri = F2FS_INODE(node_page);
ri->i_mode = cpu_to_le16(inode->i_mode);
ri->i_advise = F2FS_I(inode)->i_advise;
@@ -162,6 +194,7 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks);
set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
+ set_raw_inline(F2FS_I(inode), ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
@@ -174,55 +207,57 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
+ ri->i_dir_level = F2FS_I(inode)->i_dir_level;
- if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
- if (old_valid_dev(inode->i_rdev)) {
- ri->i_addr[0] =
- cpu_to_le32(old_encode_dev(inode->i_rdev));
- ri->i_addr[1] = 0;
- } else {
- ri->i_addr[0] = 0;
- ri->i_addr[1] =
- cpu_to_le32(new_encode_dev(inode->i_rdev));
- ri->i_addr[2] = 0;
- }
- }
-
+ __set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
set_page_dirty(node_page);
+
+ clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
}
-int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
+void update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *node_page;
- bool need_lock = false;
+retry:
+ node_page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(node_page)) {
+ int err = PTR_ERR(node_page);
+ if (err == -ENOMEM) {
+ cond_resched();
+ goto retry;
+ } else if (err != -ENOENT) {
+ f2fs_stop_checkpoint(sbi);
+ }
+ return;
+ }
+ update_inode(inode, node_page);
+ f2fs_put_page(node_page, 1);
+}
+
+int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
return 0;
+ if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE))
+ return 0;
+
+ /*
+ * We need to lock here to prevent from producing dirty node pages
+ * during the urgent cleaning time when runing out of free sections.
+ */
+ f2fs_lock_op(sbi);
+ update_inode_page(inode);
+ f2fs_unlock_op(sbi);
+
if (wbc)
f2fs_balance_fs(sbi);
- node_page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(node_page))
- return PTR_ERR(node_page);
-
- if (!PageDirty(node_page)) {
- need_lock = true;
- f2fs_put_page(node_page, 1);
- mutex_lock(&sbi->write_inode);
- node_page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(node_page)) {
- mutex_unlock(&sbi->write_inode);
- return PTR_ERR(node_page);
- }
- }
- update_inode(inode, node_page);
- f2fs_put_page(node_page, 1);
- if (need_lock)
- mutex_unlock(&sbi->write_inode);
return 0;
}
@@ -233,13 +268,14 @@ void f2fs_evict_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- truncate_inode_pages(&inode->i_data, 0);
+ trace_f2fs_evict_inode(inode);
+ truncate_inode_pages_final(&inode->i_data);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
goto no_delete;
- BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents));
+ f2fs_bug_on(get_dirty_dents(inode));
remove_dirty_dir_inode(inode);
if (inode->i_nlink || is_bad_inode(inode))
@@ -252,8 +288,13 @@ void f2fs_evict_inode(struct inode *inode)
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
+ f2fs_lock_op(sbi);
remove_inode_page(inode);
+ stat_dec_inline_inode(inode);
+ f2fs_unlock_op(sbi);
+
sb_end_intwrite(inode->i_sb);
no_delete:
clear_inode(inode);
+ invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1a49b881bac..a6bdddc33ce 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -15,8 +15,10 @@
#include <linux/ctype.h>
#include "f2fs.h"
+#include "node.h"
#include "xattr.h"
#include "acl.h"
+#include <trace/events/f2fs.h>
static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
{
@@ -31,26 +33,17 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (!inode)
return ERR_PTR(-ENOMEM);
- mutex_lock_op(sbi, NODE_NEW);
+ f2fs_lock_op(sbi);
if (!alloc_nid(sbi, &ino)) {
- mutex_unlock_op(sbi, NODE_NEW);
+ f2fs_unlock_op(sbi);
err = -ENOSPC;
goto fail;
}
- mutex_unlock_op(sbi, NODE_NEW);
+ f2fs_unlock_op(sbi);
- inode->i_uid = current_fsuid();
-
- if (dir->i_mode & S_ISGID) {
- inode->i_gid = dir->i_gid;
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else {
- inode->i_gid = current_fsgid();
- }
+ inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
- inode->i_mode = mode;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_generation = sbi->s_next_generation++;
@@ -61,7 +54,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_free = true;
goto out;
}
-
+ trace_f2fs_new_inode(inode, 0);
mark_inode_dirty(inode);
return inode;
@@ -69,6 +62,8 @@ out:
clear_nlink(inode);
unlock_new_inode(inode);
fail:
+ trace_f2fs_new_inode(inode, err);
+ make_bad_inode(inode);
iput(inode);
if (nid_free)
alloc_nid_failed(sbi, ino);
@@ -79,27 +74,17 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
- int ret;
if (sublen > slen)
- return 1;
-
- ret = memcmp(s + slen - sublen, sub, sublen);
- if (ret) { /* compare upper case */
- int i;
- char upper_sub[8];
- for (i = 0; i < sublen && i < sizeof(upper_sub); i++)
- upper_sub[i] = toupper(sub[i]);
- return memcmp(s + slen - sublen, upper_sub, sublen);
- }
+ return 0;
- return ret;
+ return !strncasecmp(s + slen - sublen, sub, sublen);
}
/*
* Set multimedia files as cold files for hot/cold data separation
*/
-static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode,
+static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
const unsigned char *name)
{
int i;
@@ -107,8 +92,8 @@ static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode,
int count = le32_to_cpu(sbi->raw_super->extension_count);
for (i = 0; i < count; i++) {
- if (!is_multimedia_file(name, extlist[i])) {
- F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
+ if (is_multimedia_file(name, extlist[i])) {
+ file_set_cold(inode);
break;
}
}
@@ -130,26 +115,28 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
return PTR_ERR(inode);
if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
- set_cold_file(sbi, inode, dentry->d_name.name);
+ set_cold_files(sbi, inode, dentry->d_name.name);
inode->i_op = &f2fs_file_inode_operations;
inode->i_fop = &f2fs_file_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
+ f2fs_unlock_op(sbi);
if (err)
goto out;
alloc_nid_done(sbi, ino);
- if (!sbi->por_doing)
- d_instantiate(dentry, inode);
+ d_instantiate(dentry, inode);
unlock_new_inode(inode);
return 0;
out:
clear_nlink(inode);
unlock_new_inode(inode);
+ make_bad_inode(inode);
iput(inode);
alloc_nid_failed(sbi, ino);
return err;
@@ -166,10 +153,12 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
f2fs_balance_fs(sbi);
inode->i_ctime = CURRENT_TIME;
- atomic_inc(&inode->i_count);
+ ihold(inode);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
+ f2fs_unlock_op(sbi);
if (err)
goto out;
@@ -197,7 +186,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
struct f2fs_dir_entry *de;
struct page *page;
- if (dentry->d_name.len > F2FS_MAX_NAME_LEN)
+ if (dentry->d_name.len > F2FS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
de = f2fs_find_entry(dir, &dentry->d_name, &page);
@@ -209,6 +198,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
inode = f2fs_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
+
+ stat_inc_inline_inode(inode);
}
return d_splice_alias(inode, dentry);
@@ -223,24 +214,28 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
struct page *page;
int err = -ENOENT;
+ trace_f2fs_unlink_enter(dir, dentry);
f2fs_balance_fs(sbi);
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de)
goto fail;
- err = check_orphan_space(sbi);
+ f2fs_lock_op(sbi);
+ err = acquire_orphan_inode(sbi);
if (err) {
+ f2fs_unlock_op(sbi);
kunmap(page);
f2fs_put_page(page, 0);
goto fail;
}
-
f2fs_delete_entry(de, page, inode);
+ f2fs_unlock_op(sbi);
/* In order to evict this inode, we set it dirty */
mark_inode_dirty(inode);
fail:
+ trace_f2fs_unlink_exit(inode, err);
return err;
}
@@ -262,7 +257,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
+ f2fs_unlock_op(sbi);
if (err)
goto out;
@@ -275,6 +272,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
out:
clear_nlink(inode);
unlock_new_inode(inode);
+ make_bad_inode(inode);
iput(inode);
alloc_nid_failed(sbi, inode->i_ino);
return err;
@@ -298,7 +296,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
+ f2fs_unlock_op(sbi);
if (err)
goto out_fail;
@@ -313,6 +313,7 @@ out_fail:
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
clear_nlink(inode);
unlock_new_inode(inode);
+ make_bad_inode(inode);
iput(inode);
alloc_nid_failed(sbi, inode->i_ino);
return err;
@@ -346,7 +347,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
+ f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
+ f2fs_unlock_op(sbi);
if (err)
goto out;
@@ -357,6 +360,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
out:
clear_nlink(inode);
unlock_new_inode(inode);
+ make_bad_inode(inode);
iput(inode);
alloc_nid_failed(sbi, inode->i_ino);
return err;
@@ -370,7 +374,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct page *old_dir_page;
- struct page *old_page;
+ struct page *old_page, *new_page;
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
@@ -389,10 +393,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_old;
}
- mutex_lock_op(sbi, RENAME);
+ f2fs_lock_op(sbi);
if (new_inode) {
- struct page *new_page;
err = -ENOTEMPTY;
if (old_dir_entry && !f2fs_empty_dir(new_inode))
@@ -404,15 +407,33 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!new_entry)
goto out_dir;
+ err = acquire_orphan_inode(sbi);
+ if (err)
+ goto put_out_dir;
+
+ if (update_dent_inode(old_inode, &new_dentry->d_name)) {
+ release_orphan_inode(sbi);
+ goto put_out_dir;
+ }
+
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME;
+ down_write(&F2FS_I(new_inode)->i_sem);
if (old_dir_entry)
drop_nlink(new_inode);
drop_nlink(new_inode);
+ up_write(&F2FS_I(new_inode)->i_sem);
+
+ mark_inode_dirty(new_inode);
+
if (!new_inode->i_nlink)
add_orphan_inode(sbi, new_inode->i_ino);
- f2fs_write_inode(new_inode, NULL);
+ else
+ release_orphan_inode(sbi);
+
+ update_inode_page(old_inode);
+ update_inode_page(new_inode);
} else {
err = f2fs_add_link(new_dentry, old_inode);
if (err)
@@ -420,12 +441,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir_entry) {
inc_nlink(new_dir);
- f2fs_write_inode(new_dir, NULL);
+ update_inode_page(new_dir);
}
}
+ down_write(&F2FS_I(old_inode)->i_sem);
+ file_lost_pino(old_inode);
+ up_write(&F2FS_I(old_inode)->i_sem);
+
old_inode->i_ctime = CURRENT_TIME;
- set_inode_flag(F2FS_I(old_inode), FI_NEED_CP);
mark_inode_dirty(old_inode);
f2fs_delete_entry(old_entry, old_page, NULL);
@@ -434,23 +458,28 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
+ update_inode_page(old_inode);
} else {
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
drop_nlink(old_dir);
- f2fs_write_inode(old_dir, NULL);
+ mark_inode_dirty(old_dir);
+ update_inode_page(old_dir);
}
- mutex_unlock_op(sbi, RENAME);
+ f2fs_unlock_op(sbi);
return 0;
+put_out_dir:
+ kunmap(new_page);
+ f2fs_put_page(new_page, 0);
out_dir:
if (old_dir_entry) {
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
- mutex_unlock_op(sbi, RENAME);
+ f2fs_unlock_op(sbi);
out_old:
kunmap(old_page);
f2fs_put_page(old_page, 0);
@@ -468,8 +497,10 @@ const struct inode_operations f2fs_dir_inode_operations = {
.rmdir = f2fs_rmdir,
.mknod = f2fs_mknod,
.rename = f2fs_rename,
+ .getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
+ .set_acl = f2fs_set_acl,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -482,6 +513,7 @@ const struct inode_operations f2fs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
+ .getattr = f2fs_getattr,
.setattr = f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
@@ -492,8 +524,10 @@ const struct inode_operations f2fs_symlink_inode_operations = {
};
const struct inode_operations f2fs_special_inode_operations = {
+ .getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
+ .set_acl = f2fs_set_acl,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e275218904e..4b697ccc9b0 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -19,10 +19,37 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include <trace/events/f2fs.h>
+
+#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
+bool available_free_memory(struct f2fs_sb_info *sbi, int type)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct sysinfo val;
+ unsigned long mem_size = 0;
+ bool res = false;
+
+ si_meminfo(&val);
+ /* give 25%, 25%, 50% memory for each components respectively */
+ if (type == FREE_NIDS) {
+ mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == NAT_ENTRIES) {
+ mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == DIRTY_DENTS) {
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
+ mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+ }
+ return res;
+}
+
static void clear_node_page_dirty(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -81,33 +108,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
return dst_page;
}
-/*
- * Readahead NAT pages
- */
-static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
-{
- struct address_space *mapping = sbi->meta_inode->i_mapping;
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct page *page;
- pgoff_t index;
- int i;
-
- for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
- if (nid >= nm_i->max_nid)
- nid = 0;
- index = current_nat_addr(sbi, nid);
-
- page = grab_cache_page(mapping, index);
- if (!page)
- continue;
- if (f2fs_readpage(sbi, page, index, READ)) {
- f2fs_put_page(page, 1);
- continue;
- }
- f2fs_put_page(page, 0);
- }
-}
-
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
return radix_tree_lookup(&nm_i->nat_root, n);
@@ -141,6 +141,32 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
return is_cp;
}
+bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+ bool fsync_done = false;
+
+ read_lock(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e)
+ fsync_done = e->fsync_done;
+ read_unlock(&nm_i->nat_tree_lock);
+ return fsync_done;
+}
+
+void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+
+ write_lock(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e)
+ e->fsync_done = false;
+ write_unlock(&nm_i->nat_tree_lock);
+}
+
static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
@@ -154,6 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
}
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
+ new->checkpointed = true;
list_add_tail(&new->list, &nm_i->nat_entries);
nm_i->nat_cnt++;
return new;
@@ -172,16 +199,13 @@ retry:
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
- nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
- nat_set_ino(e, le32_to_cpu(ne->ino));
- nat_set_version(e, ne->version);
- e->checkpointed = true;
+ node_info_from_raw_nat(&e->ni, ne);
}
write_unlock(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
- block_t new_blkaddr)
+ block_t new_blkaddr, bool fsync_done)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -195,8 +219,7 @@ retry:
goto retry;
}
e->ni = *ni;
- e->checkpointed = true;
- BUG_ON(ni->blk_addr == NEW_ADDR);
+ f2fs_bug_on(ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
/*
* when nid is reallocated,
@@ -204,19 +227,16 @@ retry:
* So, reinitialize it with new information.
*/
e->ni = *ni;
- BUG_ON(ni->blk_addr != NULL_ADDR);
+ f2fs_bug_on(ni->blk_addr != NULL_ADDR);
}
- if (new_blkaddr == NEW_ADDR)
- e->checkpointed = false;
-
/* sanity check */
- BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
- BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
+ f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
+ f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
new_blkaddr == NULL_ADDR);
- BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
+ f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
+ f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR &&
nat_get_blkaddr(e) != NULL_ADDR &&
new_blkaddr == NEW_ADDR);
@@ -229,14 +249,19 @@ retry:
/* change address */
nat_set_blkaddr(e, new_blkaddr);
__set_nat_cache_dirty(nm_i, e);
+
+ /* update fsync_mark if its inode nat entry is still alive */
+ e = __lookup_nat_cache(nm_i, ni->ino);
+ if (e)
+ e->fsync_done = fsync_done;
write_unlock(&nm_i->nat_tree_lock);
}
-static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD)
+ if (available_free_memory(sbi, NAT_ENTRIES))
return 0;
write_lock(&nm_i->nat_tree_lock);
@@ -307,9 +332,10 @@ cache:
* The maximum depth is four.
* Offset[0] will have raw inode offset.
*/
-static int get_node_path(long block, int offset[4], unsigned int noffset[4])
+static int get_node_path(struct f2fs_inode_info *fi, long block,
+ int offset[4], unsigned int noffset[4])
{
- const long direct_index = ADDRS_PER_INODE;
+ const long direct_index = ADDRS_PER_INODE(fi);
const long direct_blks = ADDRS_PER_BLOCK;
const long dptrs_per_blk = NIDS_PER_BLOCK;
const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
@@ -320,15 +346,14 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
noffset[0] = 0;
if (block < direct_index) {
- offset[n++] = block;
- level = 0;
+ offset[n] = block;
goto got;
}
block -= direct_index;
if (block < direct_blks) {
offset[n++] = NODE_DIR1_BLOCK;
noffset[n] = 1;
- offset[n++] = block;
+ offset[n] = block;
level = 1;
goto got;
}
@@ -336,7 +361,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
if (block < direct_blks) {
offset[n++] = NODE_DIR2_BLOCK;
noffset[n] = 2;
- offset[n++] = block;
+ offset[n] = block;
level = 1;
goto got;
}
@@ -346,7 +371,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
noffset[n] = 3;
offset[n++] = block / direct_blks;
noffset[n] = 4 + offset[n - 1];
- offset[n++] = block % direct_blks;
+ offset[n] = block % direct_blks;
level = 2;
goto got;
}
@@ -356,7 +381,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
noffset[n] = 4 + dptrs_per_blk;
offset[n++] = block / direct_blks;
noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
- offset[n++] = block % direct_blks;
+ offset[n] = block % direct_blks;
level = 2;
goto got;
}
@@ -371,7 +396,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
noffset[n] = 7 + (dptrs_per_blk * 2) +
offset[n - 2] * (dptrs_per_blk + 1) +
offset[n - 1];
- offset[n++] = block % direct_blks;
+ offset[n] = block % direct_blks;
level = 3;
goto got;
} else {
@@ -383,8 +408,11 @@ got:
/*
* Caller should call f2fs_put_dnode(dn).
+ * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
+ * In the case of RDONLY_NODE, we don't need to care about mutex.
*/
-int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
+int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
{
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
struct page *npage[4];
@@ -395,15 +423,19 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
int level, i;
int err = 0;
- level = get_node_path(index, offset, noffset);
+ level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
nids[0] = dn->inode->i_ino;
- npage[0] = get_node_page(sbi, nids[0]);
- if (IS_ERR(npage[0]))
- return PTR_ERR(npage[0]);
+ npage[0] = dn->inode_page;
+ if (!npage[0]) {
+ npage[0] = get_node_page(sbi, nids[0]);
+ if (IS_ERR(npage[0]))
+ return PTR_ERR(npage[0]);
+ }
parent = npage[0];
- nids[1] = get_nid(parent, offset[0], true);
+ if (level != 0)
+ nids[1] = get_nid(parent, offset[0], true);
dn->inode_page = npage[0];
dn->inode_page_locked = true;
@@ -411,30 +443,25 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
for (i = 1; i <= level; i++) {
bool done = false;
- if (!nids[i] && !ro) {
- mutex_lock_op(sbi, NODE_NEW);
-
+ if (!nids[i] && mode == ALLOC_NODE) {
/* alloc new node */
if (!alloc_nid(sbi, &(nids[i]))) {
- mutex_unlock_op(sbi, NODE_NEW);
err = -ENOSPC;
goto release_pages;
}
dn->nid = nids[i];
- npage[i] = new_node_page(dn, noffset[i]);
+ npage[i] = new_node_page(dn, noffset[i], NULL);
if (IS_ERR(npage[i])) {
alloc_nid_failed(sbi, nids[i]);
- mutex_unlock_op(sbi, NODE_NEW);
err = PTR_ERR(npage[i]);
goto release_pages;
}
set_nid(parent, offset[i - 1], nids[i], i == 1);
alloc_nid_done(sbi, nids[i]);
- mutex_unlock_op(sbi, NODE_NEW);
done = true;
- } else if (ro && i == level && level > 1) {
+ } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
npage[i] = get_node_page_ra(parent, offset[i - 1]);
if (IS_ERR(npage[i])) {
err = PTR_ERR(npage[i]);
@@ -485,15 +512,15 @@ static void truncate_node(struct dnode_of_data *dn)
get_node_info(sbi, dn->nid, &ni);
if (dn->inode->i_blocks == 0) {
- BUG_ON(ni.blk_addr != NULL_ADDR);
+ f2fs_bug_on(ni.blk_addr != NULL_ADDR);
goto invalidate;
}
- BUG_ON(ni.blk_addr == NULL_ADDR);
+ f2fs_bug_on(ni.blk_addr == NULL_ADDR);
/* Deallocate node address */
invalidate_blocks(sbi, ni.blk_addr);
- dec_valid_node_count(sbi, dn->inode, 1);
- set_node_addr(sbi, &ni, NULL_ADDR);
+ dec_valid_node_count(sbi, dn->inode);
+ set_node_addr(sbi, &ni, NULL_ADDR, false);
if (dn->nid == dn->inode->i_ino) {
remove_orphan_inode(sbi, dn->nid);
@@ -506,7 +533,12 @@ invalidate:
F2FS_SET_SB_DIRT(sbi);
f2fs_put_page(dn->node_page, 1);
+
+ invalidate_mapping_pages(NODE_MAPPING(sbi),
+ dn->node_page->index, dn->node_page->index);
+
dn->node_page = NULL;
+ trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
}
static int truncate_dnode(struct dnode_of_data *dn)
@@ -547,11 +579,15 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
if (dn->nid == 0)
return NIDS_PER_BLOCK + 1;
+ trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
+
page = get_node_page(sbi, dn->nid);
- if (IS_ERR(page))
+ if (IS_ERR(page)) {
+ trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
return PTR_ERR(page);
+ }
- rn = (struct f2fs_node *)page_address(page);
+ rn = F2FS_NODE(page);
if (depth < 3) {
for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
child_nid = le32_to_cpu(rn->in.nid[i]);
@@ -591,10 +627,12 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
} else {
f2fs_put_page(page, 1);
}
+ trace_f2fs_truncate_nodes_exit(dn->inode, freed);
return freed;
out_err:
f2fs_put_page(page, 1);
+ trace_f2fs_truncate_nodes_exit(dn->inode, ret);
return ret;
}
@@ -614,19 +652,19 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
return 0;
/* get indirect nodes in the path */
- for (i = 0; i < depth - 1; i++) {
+ for (i = 0; i < idx + 1; i++) {
/* refernece count'll be increased */
pages[i] = get_node_page(sbi, nid[i]);
if (IS_ERR(pages[i])) {
- depth = i + 1;
err = PTR_ERR(pages[i]);
+ idx = i - 1;
goto fail;
}
nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
}
/* free direct nodes linked to a partial indirect node */
- for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) {
+ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
child_nid = get_nid(pages[idx], i, false);
if (!child_nid)
continue;
@@ -637,7 +675,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
set_nid(pages[idx], i, 0, false);
}
- if (offset[depth - 1] == 0) {
+ if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
truncate_node(dn);
@@ -645,10 +683,14 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
f2fs_put_page(pages[idx], 1);
}
offset[idx]++;
- offset[depth - 1] = 0;
+ offset[idx + 1] = 0;
+ idx--;
fail:
- for (i = depth - 3; i >= 0; i--)
+ for (i = idx; i >= 0; i--)
f2fs_put_page(pages[i], 1);
+
+ trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
+
return err;
}
@@ -661,20 +703,24 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
int err = 0, cont = 1;
int level, offset[4], noffset[4];
unsigned int nofs = 0;
- struct f2fs_node *rn;
+ struct f2fs_inode *ri;
struct dnode_of_data dn;
struct page *page;
- level = get_node_path(from, offset, noffset);
+ trace_f2fs_truncate_inode_blocks_enter(inode, from);
+ level = get_node_path(F2FS_I(inode), from, offset, noffset);
+restart:
page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page))
+ if (IS_ERR(page)) {
+ trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
return PTR_ERR(page);
+ }
set_new_dnode(&dn, inode, page, NULL, 0);
unlock_page(page);
- rn = page_address(page);
+ ri = F2FS_INODE(page);
switch (level) {
case 0:
case 1:
@@ -684,7 +730,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
nofs = noffset[1];
if (!offset[level - 1])
goto skip_partial;
- err = truncate_partial_nodes(&dn, &rn->i, offset, level);
+ err = truncate_partial_nodes(&dn, ri, offset, level);
if (err < 0 && err != -ENOENT)
goto fail;
nofs += 1 + NIDS_PER_BLOCK;
@@ -693,7 +739,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
nofs = 5 + 2 * NIDS_PER_BLOCK;
if (!offset[level - 1])
goto skip_partial;
- err = truncate_partial_nodes(&dn, &rn->i, offset, level);
+ err = truncate_partial_nodes(&dn, ri, offset, level);
if (err < 0 && err != -ENOENT)
goto fail;
break;
@@ -703,7 +749,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
skip_partial:
while (cont) {
- dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]);
+ dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
switch (offset[0]) {
case NODE_DIR1_BLOCK:
case NODE_DIR2_BLOCK:
@@ -726,10 +772,14 @@ skip_partial:
if (err < 0 && err != -ENOENT)
goto fail;
if (offset[1] == 0 &&
- rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
+ ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
lock_page(page);
- wait_on_page_writeback(page);
- rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+ f2fs_put_page(page, 1);
+ goto restart;
+ }
+ f2fs_wait_on_page_writeback(page, NODE);
+ ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
}
@@ -739,100 +789,115 @@ skip_partial:
}
fail:
f2fs_put_page(page, 0);
+ trace_f2fs_truncate_inode_blocks_exit(inode, err);
return err > 0 ? 0 : err;
}
-int remove_inode_page(struct inode *inode)
+int truncate_xattr_node(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct page *page;
- nid_t ino = inode->i_ino;
+ nid_t nid = F2FS_I(inode)->i_xattr_nid;
struct dnode_of_data dn;
+ struct page *npage;
- mutex_lock_op(sbi, NODE_TRUNC);
- page = get_node_page(sbi, ino);
- if (IS_ERR(page)) {
- mutex_unlock_op(sbi, NODE_TRUNC);
- return PTR_ERR(page);
- }
+ if (!nid)
+ return 0;
- if (F2FS_I(inode)->i_xattr_nid) {
- nid_t nid = F2FS_I(inode)->i_xattr_nid;
- struct page *npage = get_node_page(sbi, nid);
+ npage = get_node_page(sbi, nid);
+ if (IS_ERR(npage))
+ return PTR_ERR(npage);
- if (IS_ERR(npage)) {
- mutex_unlock_op(sbi, NODE_TRUNC);
- return PTR_ERR(npage);
- }
+ F2FS_I(inode)->i_xattr_nid = 0;
- F2FS_I(inode)->i_xattr_nid = 0;
- set_new_dnode(&dn, inode, page, npage, nid);
- dn.inode_page_locked = 1;
- truncate_node(&dn);
- }
+ /* need to do checkpoint during fsync */
+ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
- /* 0 is possible, after f2fs_new_inode() is failed */
- BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
- set_new_dnode(&dn, inode, page, page, ino);
- truncate_node(&dn);
+ set_new_dnode(&dn, inode, page, npage, nid);
- mutex_unlock_op(sbi, NODE_TRUNC);
+ if (page)
+ dn.inode_page_locked = true;
+ truncate_node(&dn);
return 0;
}
-int new_inode_page(struct inode *inode, const struct qstr *name)
+/*
+ * Caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
+ */
+void remove_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *page;
+ nid_t ino = inode->i_ino;
+ struct dnode_of_data dn;
+
+ page = get_node_page(sbi, ino);
+ if (IS_ERR(page))
+ return;
+
+ if (truncate_xattr_node(inode, page)) {
+ f2fs_put_page(page, 1);
+ return;
+ }
+ /* 0 is possible, after f2fs_new_inode() is failed */
+ f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
+ set_new_dnode(&dn, inode, page, page, ino);
+ truncate_node(&dn);
+}
+
+struct page *new_inode_page(struct inode *inode, const struct qstr *name)
+{
struct dnode_of_data dn;
/* allocate inode page for new inode */
set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
- mutex_lock_op(sbi, NODE_NEW);
- page = new_node_page(&dn, 0);
- init_dent_inode(name, page);
- mutex_unlock_op(sbi, NODE_NEW);
- if (IS_ERR(page))
- return PTR_ERR(page);
- f2fs_put_page(page, 1);
- return 0;
+
+ /* caller should f2fs_put_page(page, 1); */
+ return new_node_page(&dn, 0, NULL);
}
-struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
+struct page *new_node_page(struct dnode_of_data *dn,
+ unsigned int ofs, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
- struct address_space *mapping = sbi->node_inode->i_mapping;
struct node_info old_ni, new_ni;
struct page *page;
int err;
- if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return ERR_PTR(-EPERM);
- page = grab_cache_page(mapping, dn->nid);
+ page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
if (!page)
return ERR_PTR(-ENOMEM);
- get_node_info(sbi, dn->nid, &old_ni);
+ if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
+ err = -ENOSPC;
+ goto fail;
+ }
- SetPageUptodate(page);
- fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
+ get_node_info(sbi, dn->nid, &old_ni);
/* Reinitialize old_ni with new node page */
- BUG_ON(old_ni.blk_addr != NULL_ADDR);
+ f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
new_ni = old_ni;
new_ni.ino = dn->inode->i_ino;
+ set_node_addr(sbi, &new_ni, NEW_ADDR, false);
- if (!inc_valid_node_count(sbi, dn->inode, 1)) {
- err = -ENOSPC;
- goto fail;
- }
- set_node_addr(sbi, &new_ni, NEW_ADDR);
+ f2fs_wait_on_page_writeback(page, NODE);
+ fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ if (f2fs_has_xattr_block(ofs))
+ F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
dn->node_page = page;
- sync_inode_page(dn);
- set_page_dirty(page);
+ if (ipage)
+ update_inode(dn->inode, ipage);
+ else
+ sync_inode_page(dn);
if (ofs == 0)
inc_valid_inode_count(sbi);
@@ -844,16 +909,28 @@ fail:
return ERR_PTR(err);
}
-static int read_node_page(struct page *page, int type)
+/*
+ * Caller should do after getting the following values.
+ * 0: f2fs_put_page(page, 0)
+ * LOCKED_PAGE: f2fs_put_page(page, 1)
+ * error: nothing
+ */
+static int read_node_page(struct page *page, int rw)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
struct node_info ni;
get_node_info(sbi, page->index, &ni);
- if (ni.blk_addr == NULL_ADDR)
+ if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ f2fs_put_page(page, 1);
return -ENOENT;
- return f2fs_readpage(sbi, page, ni.blk_addr, type);
+ }
+
+ if (PageUptodate(page))
+ return LOCKED_PAGE;
+
+ return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
}
/*
@@ -861,44 +938,52 @@ static int read_node_page(struct page *page, int type)
*/
void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- struct address_space *mapping = sbi->node_inode->i_mapping;
struct page *apage;
+ int err;
- apage = find_get_page(mapping, nid);
- if (apage && PageUptodate(apage))
- goto release_out;
+ apage = find_get_page(NODE_MAPPING(sbi), nid);
+ if (apage && PageUptodate(apage)) {
+ f2fs_put_page(apage, 0);
+ return;
+ }
f2fs_put_page(apage, 0);
- apage = grab_cache_page(mapping, nid);
+ apage = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!apage)
return;
- if (read_node_page(apage, READA))
- unlock_page(apage);
-
-release_out:
- f2fs_put_page(apage, 0);
- return;
+ err = read_node_page(apage, READA);
+ if (err == 0)
+ f2fs_put_page(apage, 0);
+ else if (err == LOCKED_PAGE)
+ f2fs_put_page(apage, 1);
}
struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
{
- int err;
struct page *page;
- struct address_space *mapping = sbi->node_inode->i_mapping;
-
- page = grab_cache_page(mapping, nid);
+ int err;
+repeat:
+ page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
err = read_node_page(page, READ_SYNC);
- if (err) {
- f2fs_put_page(page, 1);
+ if (err < 0)
return ERR_PTR(err);
- }
+ else if (err == LOCKED_PAGE)
+ goto got_it;
- BUG_ON(nid != nid_of_node(page));
- mark_page_accessed(page);
+ lock_page(page);
+ if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+got_it:
return page;
}
@@ -909,32 +994,27 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
struct page *get_node_page_ra(struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
- struct address_space *mapping = sbi->node_inode->i_mapping;
- int i, end;
- int err = 0;
- nid_t nid;
+ struct blk_plug plug;
struct page *page;
+ int err, i, end;
+ nid_t nid;
/* First, try getting the desired direct node. */
nid = get_nid(parent, start, false);
if (!nid)
return ERR_PTR(-ENOENT);
-
- page = find_get_page(mapping, nid);
- if (page && PageUptodate(page))
- goto page_hit;
- f2fs_put_page(page, 0);
-
repeat:
- page = grab_cache_page(mapping, nid);
+ page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
- err = read_node_page(page, READA);
- if (err) {
- f2fs_put_page(page, 1);
+ err = read_node_page(page, READ_SYNC);
+ if (err < 0)
return ERR_PTR(err);
- }
+ else if (err == LOCKED_PAGE)
+ goto page_hit;
+
+ blk_start_plug(&plug);
/* Then, try readahead for siblings of the desired node */
end = start + MAX_RA_NODE;
@@ -946,17 +1026,17 @@ repeat:
ra_node_page(sbi, nid);
}
-page_hit:
+ blk_finish_plug(&plug);
+
lock_page(page);
- if (PageError(page)) {
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
- return ERR_PTR(-EIO);
+ goto repeat;
}
-
- /* Has the page been truncated? */
- if (page->mapping != mapping) {
+page_hit:
+ if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
- goto repeat;
+ return ERR_PTR(-EIO);
}
return page;
}
@@ -972,14 +1052,13 @@ void sync_inode_page(struct dnode_of_data *dn)
if (!dn->inode_page_locked)
unlock_page(dn->inode_page);
} else {
- f2fs_write_inode(dn->inode, NULL);
+ update_inode_page(dn->inode);
}
}
int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
struct writeback_control *wbc)
{
- struct address_space *mapping = sbi->node_inode->i_mapping;
pgoff_t index, end;
struct pagevec pvec;
int step = ino ? 2 : 0;
@@ -993,7 +1072,7 @@ next_step:
while (index <= end) {
int i, nr_pages;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
@@ -1026,7 +1105,7 @@ next_step:
else if (!trylock_page(page))
continue;
- if (unlikely(page->mapping != mapping)) {
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
continue_unlock:
unlock_page(page);
continue;
@@ -1053,7 +1132,7 @@ continue_unlock:
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
}
- mapping->a_ops->writepage(page, wbc);
+ NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
wrote++;
if (--wbc->nr_to_write == 0)
@@ -1074,11 +1153,52 @@ continue_unlock:
}
if (wrote)
- f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL);
-
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
return nwritten;
}
+int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ pgoff_t index = 0, end = LONG_MAX;
+ struct pagevec pvec;
+ int ret2 = 0, ret = 0;
+
+ pagevec_init(&pvec, 0);
+
+ while (index <= end) {
+ int i, nr_pages;
+ nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_WRITEBACK,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ if (nr_pages == 0)
+ break;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ /* until radix tree lookup accepts end_index */
+ if (unlikely(page->index > end))
+ continue;
+
+ if (ino && ino_of_node(page) == ino) {
+ f2fs_wait_on_page_writeback(page, NODE);
+ if (TestClearPageError(page))
+ ret = -EIO;
+ }
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+
+ if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
+ ret2 = -ENOSPC;
+ if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
+ ret2 = -EIO;
+ if (!ret)
+ ret = ret2;
+ return ret;
+}
+
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
@@ -1086,68 +1206,71 @@ static int f2fs_write_node_page(struct page *page,
nid_t nid;
block_t new_addr;
struct node_info ni;
+ struct f2fs_io_info fio = {
+ .type = NODE,
+ .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ };
- if (wbc->for_reclaim) {
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- wbc->pages_skipped++;
- set_page_dirty(page);
- return AOP_WRITEPAGE_ACTIVATE;
- }
+ trace_f2fs_writepage(page, NODE);
- wait_on_page_writeback(page);
+ if (unlikely(sbi->por_doing))
+ goto redirty_out;
- mutex_lock_op(sbi, NODE_WRITE);
+ f2fs_wait_on_page_writeback(page, NODE);
/* get old block addr of this node page */
nid = nid_of_node(page);
- BUG_ON(page->index != nid);
+ f2fs_bug_on(page->index != nid);
get_node_info(sbi, nid, &ni);
/* This page is already truncated */
- if (ni.blk_addr == NULL_ADDR)
+ if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ unlock_page(page);
return 0;
+ }
- set_page_writeback(page);
+ if (wbc->for_reclaim)
+ goto redirty_out;
- /* insert node offset */
- write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
- set_node_addr(sbi, &ni, new_addr);
+ mutex_lock(&sbi->node_write);
+ set_page_writeback(page);
+ write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
+ set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
-
- mutex_unlock_op(sbi, NODE_WRITE);
+ mutex_unlock(&sbi->node_write);
unlock_page(page);
return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
}
-/*
- * It is very important to gather dirty pages and write at once, so that we can
- * submit a big bio without interfering other data writes.
- * Be default, 512 pages (2MB), a segment size, is quite reasonable.
- */
-#define COLLECT_DIRTY_NODES 512
static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- struct block_device *bdev = sbi->sb->s_bdev;
- long nr_to_write = wbc->nr_to_write;
+ long diff;
- /* First check balancing cached NAT entries */
- if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
- write_checkpoint(sbi, false);
- return 0;
- }
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
+
+ /* balancing f2fs's metadata in background */
+ f2fs_balance_fs_bg(sbi);
/* collect a number of dirty node pages and write together */
- if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
- return 0;
+ if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+ goto skip_write;
- /* if mounting is failed, skip writing node pages */
- wbc->nr_to_write = bio_get_nr_vecs(bdev);
+ diff = nr_pages_to_write(sbi, NODE, wbc);
+ wbc->sync_mode = WB_SYNC_NONE;
sync_node_pages(sbi, 0, wbc);
- wbc->nr_to_write = nr_to_write -
- (bio_get_nr_vecs(bdev) - wbc->nr_to_write);
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
+ return 0;
+
+skip_write:
+ wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
return 0;
}
@@ -1156,6 +1279,8 @@ static int f2fs_set_node_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+ trace_f2fs_set_page_dirty(page, NODE);
+
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
@@ -1166,7 +1291,8 @@ static int f2fs_set_node_page_dirty(struct page *page)
return 0;
}
-static void f2fs_invalidate_node_page(struct page *page, unsigned long offset)
+static void f2fs_invalidate_node_page(struct page *page, unsigned int offset,
+ unsigned int length)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -1178,7 +1304,7 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned long offset)
static int f2fs_release_node_page(struct page *page, gfp_t wait)
{
ClearPagePrivate(page);
- return 0;
+ return 1;
}
/*
@@ -1192,42 +1318,51 @@ const struct address_space_operations f2fs_node_aops = {
.releasepage = f2fs_release_node_page,
};
-static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
+static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
+ nid_t n)
{
- struct list_head *this;
- struct free_nid *i = NULL;
- list_for_each(this, head) {
- i = list_entry(this, struct free_nid, list);
- if (i->nid == n)
- break;
- i = NULL;
- }
- return i;
+ return radix_tree_lookup(&nm_i->free_nid_root, n);
}
-static void __del_from_free_nid_list(struct free_nid *i)
+static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
+ struct free_nid *i)
{
list_del(&i->list);
- kmem_cache_free(free_nid_slab, i);
+ radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
-static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
+static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
+ struct nat_entry *ne;
+ bool allocated = false;
+
+ if (!available_free_memory(sbi, FREE_NIDS))
+ return -1;
- if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
+ /* 0 nid should not be used */
+ if (unlikely(nid == 0))
return 0;
-retry:
- i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
- if (!i) {
- cond_resched();
- goto retry;
+
+ if (build) {
+ /* do not add allocated nids */
+ read_lock(&nm_i->nat_tree_lock);
+ ne = __lookup_nat_cache(nm_i, nid);
+ if (ne &&
+ (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
+ allocated = true;
+ read_unlock(&nm_i->nat_tree_lock);
+ if (allocated)
+ return 0;
}
+
+ i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = NID_NEW;
spin_lock(&nm_i->free_nid_list_lock);
- if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
+ if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
spin_unlock(&nm_i->free_nid_list_lock);
kmem_cache_free(free_nid_slab, i);
return 0;
@@ -1241,72 +1376,75 @@ retry:
static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct free_nid *i;
+ bool need_free = false;
+
spin_lock(&nm_i->free_nid_list_lock);
- i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+ i = __lookup_free_nid_list(nm_i, nid);
if (i && i->state == NID_NEW) {
- __del_from_free_nid_list(i);
+ __del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
+ need_free = true;
}
spin_unlock(&nm_i->free_nid_list_lock);
+
+ if (need_free)
+ kmem_cache_free(free_nid_slab, i);
}
-static int scan_nat_page(struct f2fs_nm_info *nm_i,
+static void scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct f2fs_nat_block *nat_blk = page_address(nat_page);
block_t blk_addr;
- int fcnt = 0;
int i;
- /* 0 nid should not be used */
- if (start_nid == 0)
- ++start_nid;
-
i = start_nid % NAT_ENTRY_PER_BLOCK;
for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
- blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
- BUG_ON(blk_addr == NEW_ADDR);
- if (blk_addr == NULL_ADDR)
- fcnt += add_free_nid(nm_i, start_nid);
+
+ if (unlikely(start_nid >= nm_i->max_nid))
+ break;
+
+ blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
+ f2fs_bug_on(blk_addr == NEW_ADDR);
+ if (blk_addr == NULL_ADDR) {
+ if (add_free_nid(sbi, start_nid, true) < 0)
+ break;
+ }
}
- return fcnt;
}
static void build_free_nids(struct f2fs_sb_info *sbi)
{
- struct free_nid *fnid, *next_fnid;
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- nid_t nid = 0;
- bool is_cycled = false;
- int fcnt = 0;
- int i;
+ int i = 0;
+ nid_t nid = nm_i->next_scan_nid;
- nid = nm_i->next_scan_nid;
- nm_i->init_scan_nid = nid;
+ /* Enough entries */
+ if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
+ return;
- ra_nat_pages(sbi, nid);
+ /* readahead nat pages to be scanned */
+ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
- fcnt += scan_nat_page(nm_i, page, nid);
+ scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
-
- if (nid >= nm_i->max_nid) {
+ if (unlikely(nid >= nm_i->max_nid))
nid = 0;
- is_cycled = true;
- }
- if (fcnt > MAX_FREE_NIDS)
- break;
- if (is_cycled && nm_i->init_scan_nid <= nid)
+
+ if (i++ == FREE_NID_PAGES)
break;
}
+ /* go to the next free nat pages to find free nids abundantly */
nm_i->next_scan_nid = nid;
/* find free nids from current sum_pages */
@@ -1315,22 +1453,11 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
nid = le32_to_cpu(nid_in_journal(sum, i));
if (addr == NULL_ADDR)
- add_free_nid(nm_i, nid);
+ add_free_nid(sbi, nid, true);
else
remove_free_nid(nm_i, nid);
}
mutex_unlock(&curseg->curseg_mutex);
-
- /* remove the free nids from current allocated nids */
- list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) {
- struct nat_entry *ne;
-
- read_lock(&nm_i->nat_tree_lock);
- ne = __lookup_nat_cache(nm_i, fnid->nid);
- if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
- remove_free_nid(nm_i, fnid->nid);
- read_unlock(&nm_i->nat_tree_lock);
- }
}
/*
@@ -1342,43 +1469,33 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
- struct list_head *this;
retry:
- mutex_lock(&nm_i->build_lock);
- if (!nm_i->fcnt) {
- /* scan NAT in order to build free nid list */
- build_free_nids(sbi);
- if (!nm_i->fcnt) {
- mutex_unlock(&nm_i->build_lock);
- return false;
- }
- }
- mutex_unlock(&nm_i->build_lock);
+ if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
+ return false;
- /*
- * We check fcnt again since previous check is racy as
- * we didn't hold free_nid_list_lock. So other thread
- * could consume all of free nids.
- */
spin_lock(&nm_i->free_nid_list_lock);
- if (!nm_i->fcnt) {
- spin_unlock(&nm_i->free_nid_list_lock);
- goto retry;
- }
- BUG_ON(list_empty(&nm_i->free_nid_list));
- list_for_each(this, &nm_i->free_nid_list) {
- i = list_entry(this, struct free_nid, list);
- if (i->state == NID_NEW)
- break;
- }
+ /* We should not use stale free nids created by build_free_nids */
+ if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
+ f2fs_bug_on(list_empty(&nm_i->free_nid_list));
+ list_for_each_entry(i, &nm_i->free_nid_list, list)
+ if (i->state == NID_NEW)
+ break;
- BUG_ON(i->state != NID_NEW);
- *nid = i->nid;
- i->state = NID_ALLOC;
- nm_i->fcnt--;
+ f2fs_bug_on(i->state != NID_NEW);
+ *nid = i->nid;
+ i->state = NID_ALLOC;
+ nm_i->fcnt--;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ return true;
+ }
spin_unlock(&nm_i->free_nid_list_lock);
- return true;
+
+ /* Let's scan nat pages and its caches to get free nids */
+ mutex_lock(&nm_i->build_lock);
+ build_free_nids(sbi);
+ mutex_unlock(&nm_i->build_lock);
+ goto retry;
}
/*
@@ -1390,12 +1507,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
struct free_nid *i;
spin_lock(&nm_i->free_nid_list_lock);
- i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
- if (i) {
- BUG_ON(i->state != NID_ALLOC);
- __del_from_free_nid_list(i);
- }
+ i = __lookup_free_nid_list(nm_i, nid);
+ f2fs_bug_on(!i || i->state != NID_ALLOC);
+ __del_from_free_nid_list(nm_i, i);
spin_unlock(&nm_i->free_nid_list_lock);
+
+ kmem_cache_free(free_nid_slab, i);
}
/*
@@ -1403,8 +1520,27 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
*/
void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
{
- alloc_nid_done(sbi, nid);
- add_free_nid(NM_I(sbi), nid);
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct free_nid *i;
+ bool need_free = false;
+
+ if (!nid)
+ return;
+
+ spin_lock(&nm_i->free_nid_list_lock);
+ i = __lookup_free_nid_list(nm_i, nid);
+ f2fs_bug_on(!i || i->state != NID_ALLOC);
+ if (!available_free_memory(sbi, FREE_NIDS)) {
+ __del_from_free_nid_list(nm_i, i);
+ need_free = true;
+ } else {
+ i->state = NID_NEW;
+ nm_i->fcnt++;
+ }
+ spin_unlock(&nm_i->free_nid_list_lock);
+
+ if (need_free)
+ kmem_cache_free(free_nid_slab, i);
}
void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1412,88 +1548,200 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
block_t new_blkaddr)
{
rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
- set_node_addr(sbi, ni, new_blkaddr);
+ set_node_addr(sbi, ni, new_blkaddr, false);
clear_node_page_dirty(page);
}
+static void recover_inline_xattr(struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ void *src_addr, *dst_addr;
+ size_t inline_size;
+ struct page *ipage;
+ struct f2fs_inode *ri;
+
+ if (!f2fs_has_inline_xattr(inode))
+ return;
+
+ if (!IS_INODE(page))
+ return;
+
+ ri = F2FS_INODE(page);
+ if (!(ri->i_inline & F2FS_INLINE_XATTR))
+ return;
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(IS_ERR(ipage));
+
+ dst_addr = inline_xattr_addr(ipage);
+ src_addr = inline_xattr_addr(page);
+ inline_size = inline_xattr_size(inode);
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ memcpy(dst_addr, src_addr, inline_size);
+
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+}
+
+bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
+ nid_t new_xnid = nid_of_node(page);
+ struct node_info ni;
+
+ recover_inline_xattr(inode, page);
+
+ if (!f2fs_has_xattr_block(ofs_of_node(page)))
+ return false;
+
+ /* 1: invalidate the previous xattr nid */
+ if (!prev_xnid)
+ goto recover_xnid;
+
+ /* Deallocate node address */
+ get_node_info(sbi, prev_xnid, &ni);
+ f2fs_bug_on(ni.blk_addr == NULL_ADDR);
+ invalidate_blocks(sbi, ni.blk_addr);
+ dec_valid_node_count(sbi, inode);
+ set_node_addr(sbi, &ni, NULL_ADDR, false);
+
+recover_xnid:
+ /* 2: allocate new xattr nid */
+ if (unlikely(!inc_valid_node_count(sbi, inode)))
+ f2fs_bug_on(1);
+
+ remove_free_nid(NM_I(sbi), new_xnid);
+ get_node_info(sbi, new_xnid, &ni);
+ ni.ino = inode->i_ino;
+ set_node_addr(sbi, &ni, NEW_ADDR, false);
+ F2FS_I(inode)->i_xattr_nid = new_xnid;
+
+ /* 3: update xattr blkaddr */
+ refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
+ set_node_addr(sbi, &ni, blkaddr, false);
+
+ update_inode_page(inode);
+ return true;
+}
+
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
{
- struct address_space *mapping = sbi->node_inode->i_mapping;
- struct f2fs_node *src, *dst;
+ struct f2fs_inode *src, *dst;
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
- ipage = grab_cache_page(mapping, ino);
+ get_node_info(sbi, ino, &old_ni);
+
+ if (unlikely(old_ni.blk_addr != NULL_ADDR))
+ return -EINVAL;
+
+ ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
if (!ipage)
return -ENOMEM;
/* Should not use this inode from free nid list */
remove_free_nid(NM_I(sbi), ino);
- get_node_info(sbi, ino, &old_ni);
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
- src = (struct f2fs_node *)page_address(page);
- dst = (struct f2fs_node *)page_address(ipage);
+ src = F2FS_INODE(page);
+ dst = F2FS_INODE(ipage);
- memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
- dst->i.i_size = 0;
- dst->i.i_blocks = cpu_to_le64(1);
- dst->i.i_links = cpu_to_le32(1);
- dst->i.i_xattr_nid = 0;
+ memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
+ dst->i_size = 0;
+ dst->i_blocks = cpu_to_le64(1);
+ dst->i_links = cpu_to_le32(1);
+ dst->i_xattr_nid = 0;
new_ni = old_ni;
new_ni.ino = ino;
- set_node_addr(sbi, &new_ni, NEW_ADDR);
+ if (unlikely(!inc_valid_node_count(sbi, NULL)))
+ WARN_ON(1);
+ set_node_addr(sbi, &new_ni, NEW_ADDR, false);
inc_valid_inode_count(sbi);
-
f2fs_put_page(ipage, 1);
return 0;
}
+/*
+ * ra_sum_pages() merge contiguous pages into one bio and submit.
+ * these pre-readed pages are alloced in bd_inode's mapping tree.
+ */
+static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
+ int start, int nrpages)
+{
+ struct inode *inode = sbi->sb->s_bdev->bd_inode;
+ struct address_space *mapping = inode->i_mapping;
+ int i, page_idx = start;
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = READ_SYNC | REQ_META | REQ_PRIO
+ };
+
+ for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
+ /* alloc page in bd_inode for reading node summary info */
+ pages[i] = grab_cache_page(mapping, page_idx);
+ if (!pages[i])
+ break;
+ f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
+ }
+
+ f2fs_submit_merged_bio(sbi, META, READ);
+ return i;
+}
+
int restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
- struct page *page;
+ struct inode *inode = sbi->sb->s_bdev->bd_inode;
block_t addr;
- int i, last_offset;
-
- /* alloc temporal page for read node */
- page = alloc_page(GFP_NOFS | __GFP_ZERO);
- if (IS_ERR(page))
- return PTR_ERR(page);
- lock_page(page);
+ int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+ struct page *pages[bio_blocks];
+ int i, idx, last_offset, nrpages, err = 0;
/* scan the node segment */
last_offset = sbi->blocks_per_seg;
addr = START_BLOCK(sbi, segno);
sum_entry = &sum->entries[0];
- for (i = 0; i < last_offset; i++, sum_entry++) {
- if (f2fs_readpage(sbi, page, addr, READ_SYNC))
- goto out;
+ for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
+ nrpages = min(last_offset - i, bio_blocks);
- rn = (struct f2fs_node *)page_address(page);
- sum_entry->nid = rn->footer.nid;
- sum_entry->version = 0;
- sum_entry->ofs_in_node = 0;
- addr++;
+ /* read ahead node pages */
+ nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
+ if (!nrpages)
+ return -ENOMEM;
- /*
- * In order to read next node page,
- * we must clear PageUptodate flag.
- */
- ClearPageUptodate(page);
+ for (idx = 0; idx < nrpages; idx++) {
+ if (err)
+ goto skip;
+
+ lock_page(pages[idx]);
+ if (unlikely(!PageUptodate(pages[idx]))) {
+ err = -EIO;
+ } else {
+ rn = F2FS_NODE(pages[idx]);
+ sum_entry->nid = rn->footer.nid;
+ sum_entry->version = 0;
+ sum_entry->ofs_in_node = 0;
+ sum_entry++;
+ }
+ unlock_page(pages[idx]);
+skip:
+ page_cache_release(pages[idx]);
+ }
+
+ invalidate_mapping_pages(inode->i_mapping, addr,
+ addr + nrpages);
}
-out:
- unlock_page(page);
- __free_pages(page, 0);
- return 0;
+ return err;
}
static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -1529,9 +1777,7 @@ retry:
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
- nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
- nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
- nat_set_version(ne, raw_ne.version);
+ node_info_from_raw_nat(&ne->ni, &raw_ne);
__set_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
}
@@ -1548,7 +1794,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- struct list_head *cur, *n;
+ struct nat_entry *ne, *cur;
struct page *page = NULL;
struct f2fs_nat_block *nat_blk = NULL;
nid_t start_nid = 0, end_nid = 0;
@@ -1560,18 +1806,16 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
mutex_lock(&curseg->curseg_mutex);
/* 1) flush dirty nat caches */
- list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
- struct nat_entry *ne;
+ list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
nid_t nid;
struct f2fs_nat_entry raw_ne;
int offset = -1;
- block_t new_blkaddr;
-
- ne = list_entry(cur, struct nat_entry, list);
- nid = nat_get_nid(ne);
if (nat_get_blkaddr(ne) == NEW_ADDR)
continue;
+
+ nid = nat_get_nid(ne);
+
if (flushed)
goto to_nat_page;
@@ -1598,14 +1842,10 @@ to_nat_page:
nat_blk = page_address(page);
}
- BUG_ON(!nat_blk);
+ f2fs_bug_on(!nat_blk);
raw_ne = nat_blk->entries[nid - start_nid];
flush_now:
- new_blkaddr = nat_get_blkaddr(ne);
-
- raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
- raw_ne.block_addr = cpu_to_le32(new_blkaddr);
- raw_ne.version = nat_get_version(ne);
+ raw_nat_from_node_info(&raw_ne, &ne->ni);
if (offset < 0) {
nat_blk->entries[nid - start_nid] = raw_ne;
@@ -1614,26 +1854,20 @@ flush_now:
nid_in_journal(sum, offset) = cpu_to_le32(nid);
}
- if (nat_get_blkaddr(ne) == NULL_ADDR) {
+ if (nat_get_blkaddr(ne) == NULL_ADDR &&
+ add_free_nid(sbi, nid, false) <= 0) {
write_lock(&nm_i->nat_tree_lock);
__del_from_nat_cache(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
-
- /* We can reuse this freed nid at this point */
- add_free_nid(NM_I(sbi), nid);
} else {
write_lock(&nm_i->nat_tree_lock);
__clear_nat_cache_dirty(nm_i, ne);
- ne->checkpointed = true;
write_unlock(&nm_i->nat_tree_lock);
}
}
if (!flushed)
mutex_unlock(&curseg->curseg_mutex);
f2fs_put_page(page, 1);
-
- /* 2) shrink nat caches if necessary */
- try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
}
static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1648,10 +1882,16 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
/* segment_count_nat includes pair segment so divide to 2. */
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+
+ /* not used nids: 0, node, meta, (and root counted as valid node) */
+ nm_i->available_nids = nm_i->max_nid - 3;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
+ nm_i->ram_thresh = DEF_RAM_THRESHOLD;
+ INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -1661,19 +1901,16 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
spin_lock_init(&nm_i->free_nid_list_lock);
rwlock_init(&nm_i->nat_tree_lock);
- nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
- nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
-
- nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL);
- if (!nm_i->nat_bitmap)
- return -ENOMEM;
+ nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
if (!version_bitmap)
return -EFAULT;
- /* copy version bitmap */
- memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size);
+ nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
+ GFP_KERNEL);
+ if (!nm_i->nat_bitmap)
+ return -ENOMEM;
return 0;
}
@@ -1707,11 +1944,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy free nid list */
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
- BUG_ON(i->state == NID_ALLOC);
- __del_from_free_nid_list(i);
+ f2fs_bug_on(i->state == NID_ALLOC);
+ __del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ kmem_cache_free(free_nid_slab, i);
+ spin_lock(&nm_i->free_nid_list_lock);
}
- BUG_ON(nm_i->fcnt);
+ f2fs_bug_on(nm_i->fcnt);
spin_unlock(&nm_i->free_nid_list_lock);
/* destroy nat cache */
@@ -1719,13 +1959,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
- for (idx = 0; idx < found; idx++) {
- struct nat_entry *e = natvec[idx];
- nid = nat_get_nid(e) + 1;
- __del_from_nat_cache(nm_i, e);
- }
+ nid = nat_get_nid(natvec[found - 1]) + 1;
+ for (idx = 0; idx < found; idx++)
+ __del_from_nat_cache(nm_i, natvec[idx]);
}
- BUG_ON(nm_i->nat_cnt);
+ f2fs_bug_on(nm_i->nat_cnt);
write_unlock(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
@@ -1736,12 +1974,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
int __init create_node_manager_caches(void)
{
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
- sizeof(struct nat_entry), NULL);
+ sizeof(struct nat_entry));
if (!nat_entry_slab)
return -ENOMEM;
free_nid_slab = f2fs_kmem_cache_create("free_nid",
- sizeof(struct free_nid), NULL);
+ sizeof(struct free_nid));
if (!free_nid_slab) {
kmem_cache_destroy(nat_entry_slab);
return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index afdb130f782..7281112cd1c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -17,18 +17,18 @@
/* # of pages to perform readahead before building free nids */
#define FREE_NID_PAGES 4
-/* maximum # of free node ids to produce during build_free_nids */
-#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
-
/* maximum readahead size for node during getting data blocks */
#define MAX_RA_NODE 128
-/* maximum cached nat entries to manage memory footprint */
-#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK)
+/* control the memory footprint threshold (10MB per 1GB ram) */
+#define DEF_RAM_THRESHOLD 10
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
+/* return value for read_node_page */
+#define LOCKED_PAGE 1
+
/*
* For node information
*/
@@ -42,6 +42,7 @@ struct node_info {
struct nat_entry {
struct list_head list; /* for clean or dirty nat list */
bool checkpointed; /* whether it is checkpointed or not */
+ bool fsync_done; /* whether the latest node has fsync mark */
struct node_info ni; /* in-memory node information */
};
@@ -55,9 +56,15 @@ struct nat_entry {
#define nat_set_version(nat, v) (nat->ni.version = v)
#define __set_nat_cache_dirty(nm_i, ne) \
- list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
+ do { \
+ ne->checkpointed = false; \
+ list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
+ } while (0)
#define __clear_nat_cache_dirty(nm_i, ne) \
- list_move_tail(&ne->list, &nm_i->nat_entries);
+ do { \
+ ne->checkpointed = true; \
+ list_move_tail(&ne->list, &nm_i->nat_entries); \
+ } while (0)
#define inc_node_version(version) (++version)
static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -68,6 +75,20 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
ni->version = raw_ne->version;
}
+static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
+ struct node_info *ni)
+{
+ raw_ne->ino = cpu_to_le32(ni->ino);
+ raw_ne->block_addr = cpu_to_le32(ni->blk_addr);
+ raw_ne->version = ni->version;
+}
+
+enum mem_type {
+ FREE_NIDS, /* indicates the free nid list */
+ NAT_ENTRIES, /* indicates the cached nat entry */
+ DIRTY_DENTS /* indicates dirty dentry pages */
+};
+
/*
* For free nid mangement
*/
@@ -152,8 +173,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
static inline void fill_node_footer(struct page *page, nid_t nid,
nid_t ino, unsigned int ofs, bool reset)
{
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(page);
if (reset)
memset(rn, 0, sizeof(*rn));
rn->footer.nid = cpu_to_le32(nid);
@@ -163,10 +183,8 @@ static inline void fill_node_footer(struct page *page, nid_t nid,
static inline void copy_node_footer(struct page *dst, struct page *src)
{
- void *src_addr = page_address(src);
- void *dst_addr = page_address(dst);
- struct f2fs_node *src_rn = (struct f2fs_node *)src_addr;
- struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr;
+ struct f2fs_node *src_rn = F2FS_NODE(src);
+ struct f2fs_node *dst_rn = F2FS_NODE(dst);
memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
}
@@ -174,45 +192,40 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(page);
+
rn->footer.cp_ver = ckpt->checkpoint_ver;
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
static inline nid_t ino_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.ino);
}
static inline nid_t nid_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.nid);
}
static inline unsigned int ofs_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
unsigned flag = le32_to_cpu(rn->footer.flag);
return flag >> OFFSET_BIT_SHIFT;
}
static inline unsigned long long cpver_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
return le64_to_cpu(rn->footer.cp_ver);
}
static inline block_t next_blkaddr_of_node(struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.next_blkaddr);
}
@@ -229,17 +242,27 @@ static inline block_t next_blkaddr_of_node(struct page *node_page)
* | `- direct node (5 + N => 5 + 2N - 1)
* `- double indirect node (5 + 2N)
* `- indirect node (6 + 2N)
- * `- direct node (x(N + 1))
+ * `- direct node
+ * ......
+ * `- indirect node ((6 + 2N) + x(N + 1))
+ * `- direct node
+ * ......
+ * `- indirect node ((6 + 2N) + (N - 1)(N + 1))
+ * `- direct node
*/
static inline bool IS_DNODE(struct page *node_page)
{
unsigned int ofs = ofs_of_node(node_page);
+
+ if (f2fs_has_xattr_block(ofs))
+ return false;
+
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
ofs == 5 + 2 * NIDS_PER_BLOCK)
return false;
if (ofs >= 6 + 2 * NIDS_PER_BLOCK) {
ofs -= 6 + 2 * NIDS_PER_BLOCK;
- if ((long int)ofs % (NIDS_PER_BLOCK + 1))
+ if (!((long int)ofs % (NIDS_PER_BLOCK + 1)))
return false;
}
return true;
@@ -247,9 +270,9 @@ static inline bool IS_DNODE(struct page *node_page)
static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
{
- struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
+ struct f2fs_node *rn = F2FS_NODE(p);
- wait_on_page_writeback(p);
+ f2fs_wait_on_page_writeback(p, NODE);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
@@ -260,7 +283,8 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
static inline nid_t get_nid(struct page *p, int off, bool i)
{
- struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
+ struct f2fs_node *rn = F2FS_NODE(p);
+
if (i)
return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]);
return le32_to_cpu(rn->in.nid[off]);
@@ -272,11 +296,28 @@ static inline nid_t get_nid(struct page *p, int off, bool i)
* - Mark cold node blocks in their node footer
* - Mark cold data pages in page cache
*/
-static inline int is_cold_file(struct inode *inode)
+static inline int is_file(struct inode *inode, int type)
+{
+ return F2FS_I(inode)->i_advise & type;
+}
+
+static inline void set_file(struct inode *inode, int type)
+{
+ F2FS_I(inode)->i_advise |= type;
+}
+
+static inline void clear_file(struct inode *inode, int type)
{
- return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT;
+ F2FS_I(inode)->i_advise &= ~type;
}
+#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
+#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
+#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT)
+#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT)
+
static inline int is_cold_data(struct page *page)
{
return PageChecked(page);
@@ -292,33 +333,19 @@ static inline void clear_cold_data(struct page *page)
ClearPageChecked(page);
}
-static inline int is_cold_node(struct page *page)
+static inline int is_node(struct page *page, int type)
{
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
- unsigned int flag = le32_to_cpu(rn->footer.flag);
- return flag & (0x1 << COLD_BIT_SHIFT);
+ struct f2fs_node *rn = F2FS_NODE(page);
+ return le32_to_cpu(rn->footer.flag) & (1 << type);
}
-static inline unsigned char is_fsync_dnode(struct page *page)
-{
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
- unsigned int flag = le32_to_cpu(rn->footer.flag);
- return flag & (0x1 << FSYNC_BIT_SHIFT);
-}
-
-static inline unsigned char is_dent_dnode(struct page *page)
-{
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
- unsigned int flag = le32_to_cpu(rn->footer.flag);
- return flag & (0x1 << DENT_BIT_SHIFT);
-}
+#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT)
+#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT)
+#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT)
static inline void set_cold_node(struct inode *inode, struct page *page)
{
- struct f2fs_node *rn = (struct f2fs_node *)page_address(page);
+ struct f2fs_node *rn = F2FS_NODE(page);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (S_ISDIR(inode->i_mode))
@@ -328,26 +355,15 @@ static inline void set_cold_node(struct inode *inode, struct page *page)
rn->footer.flag = cpu_to_le32(flag);
}
-static inline void set_fsync_mark(struct page *page, int mark)
-{
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
- unsigned int flag = le32_to_cpu(rn->footer.flag);
- if (mark)
- flag |= (0x1 << FSYNC_BIT_SHIFT);
- else
- flag &= ~(0x1 << FSYNC_BIT_SHIFT);
- rn->footer.flag = cpu_to_le32(flag);
-}
-
-static inline void set_dentry_mark(struct page *page, int mark)
+static inline void set_mark(struct page *page, int mark, int type)
{
- void *kaddr = page_address(page);
- struct f2fs_node *rn = (struct f2fs_node *)kaddr;
+ struct f2fs_node *rn = F2FS_NODE(page);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (mark)
- flag |= (0x1 << DENT_BIT_SHIFT);
+ flag |= (0x1 << type);
else
- flag &= ~(0x1 << DENT_BIT_SHIFT);
+ flag &= ~(0x1 << type);
rn->footer.flag = cpu_to_le32(flag);
}
+#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
+#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b235215ac13..a112368a4a8 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -27,57 +27,93 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
nid_t ino)
{
- struct list_head *this;
struct fsync_inode_entry *entry;
- list_for_each(this, head) {
- entry = list_entry(this, struct fsync_inode_entry, list);
+ list_for_each_entry(entry, head, list)
if (entry->inode->i_ino == ino)
return entry;
- }
+
return NULL;
}
static int recover_dentry(struct page *ipage, struct inode *inode)
{
- struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
- struct f2fs_inode *raw_inode = &(raw_node->i);
- struct qstr name;
+ struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
+ nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
+ struct qstr name;
struct page *page;
- struct inode *dir;
+ struct inode *dir, *einode;
int err = 0;
- if (!is_dent_dnode(ipage))
- goto out;
-
- dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino));
+ dir = f2fs_iget(inode->i_sb, pino);
if (IS_ERR(dir)) {
- err = -EINVAL;
+ err = PTR_ERR(dir);
goto out;
}
name.len = le32_to_cpu(raw_inode->i_namelen);
name.name = raw_inode->i_name;
+ if (unlikely(name.len > F2FS_NAME_LEN)) {
+ WARN_ON(1);
+ err = -ENAMETOOLONG;
+ goto out_err;
+ }
+retry:
de = f2fs_find_entry(dir, &name, &page);
+ if (de && inode->i_ino == le32_to_cpu(de->ino))
+ goto out_unmap_put;
if (de) {
- kunmap(page);
- f2fs_put_page(page, 0);
+ einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
+ if (IS_ERR(einode)) {
+ WARN_ON(1);
+ err = PTR_ERR(einode);
+ if (err == -ENOENT)
+ err = -EEXIST;
+ goto out_unmap_put;
+ }
+ err = acquire_orphan_inode(F2FS_SB(inode->i_sb));
+ if (err) {
+ iput(einode);
+ goto out_unmap_put;
+ }
+ f2fs_delete_entry(de, page, einode);
+ iput(einode);
+ goto retry;
+ }
+ err = __f2fs_add_link(dir, &name, inode);
+ if (err)
+ goto out_err;
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
+ iput(dir);
} else {
- err = __f2fs_add_link(dir, &name, inode);
+ add_dirty_dir_inode(dir);
+ set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
}
+
+ goto out;
+
+out_unmap_put:
+ kunmap(page);
+ f2fs_put_page(page, 0);
+out_err:
iput(dir);
out:
- kunmap(ipage);
+ f2fs_msg(inode->i_sb, KERN_NOTICE,
+ "%s: ino = %x, name = %s, dir = %lx, err = %d",
+ __func__, ino_of_node(ipage), raw_inode->i_name,
+ IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
static int recover_inode(struct inode *inode, struct page *node_page)
{
- void *kaddr = page_address(node_page);
- struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
- struct f2fs_inode *raw_inode = &(raw_node->i);
+ struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
+
+ if (!IS_INODE(node_page))
+ return 0;
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_size_write(inode, le64_to_cpu(raw_inode->i_size));
@@ -88,12 +124,17 @@ static int recover_inode(struct inode *inode, struct page *node_page)
inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
- return recover_dentry(node_page, inode);
+ if (is_dent_dnode(node_page))
+ return recover_dentry(node_page, inode);
+
+ f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
+ ino_of_node(node_page), raw_inode->i_name);
+ return 0;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
- unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
+ unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
struct page *page;
block_t blkaddr;
@@ -101,75 +142,73 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
- blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* read node page */
page = alloc_page(GFP_F2FS_ZERO);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ if (!page)
+ return -ENOMEM;
lock_page(page);
while (1) {
struct fsync_inode_entry *entry;
- if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
- goto out;
+ err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);
+ if (err)
+ return err;
+
+ lock_page(page);
if (cp_ver != cpver_of_node(page))
- goto out;
+ break;
if (!is_fsync_dnode(page))
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
if (entry) {
- entry->blkaddr = blkaddr;
if (IS_INODE(page) && is_dent_dnode(page))
set_inode_flag(F2FS_I(entry->inode),
FI_INC_LINK);
} else {
if (IS_INODE(page) && is_dent_dnode(page)) {
- if (recover_inode_page(sbi, page)) {
- err = -ENOMEM;
- goto out;
- }
+ err = recover_inode_page(sbi, page);
+ if (err)
+ break;
}
/* add this fsync inode to the list */
entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS);
if (!entry) {
err = -ENOMEM;
- goto out;
+ break;
}
entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
if (IS_ERR(entry->inode)) {
err = PTR_ERR(entry->inode);
kmem_cache_free(fsync_entry_slab, entry);
- goto out;
+ break;
}
-
list_add_tail(&entry->list, head);
- entry->blkaddr = blkaddr;
- }
- if (IS_INODE(page)) {
- err = recover_inode(entry->inode, page);
- if (err)
- goto out;
}
+ entry->blkaddr = blkaddr;
+
+ err = recover_inode(entry->inode, page);
+ if (err && err != -ENOENT)
+ break;
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
- ClearPageUptodate(page);
}
-out:
+
unlock_page(page);
__free_pages(page, 0);
+
return err;
}
-static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
- struct list_head *head)
+static void destroy_fsync_dnodes(struct list_head *head)
{
struct fsync_inode_entry *entry, *tmp;
@@ -180,81 +219,111 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
}
}
-static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
- block_t blkaddr)
+static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
+ block_t blkaddr, struct dnode_of_data *dn)
{
struct seg_entry *sentry;
unsigned int segno = GET_SEGNO(sbi, blkaddr);
- unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
- (sbi->blocks_per_seg - 1);
+ unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+ struct f2fs_summary_block *sum_node;
struct f2fs_summary sum;
- nid_t ino;
- void *kaddr;
+ struct page *sum_page, *node_page;
+ nid_t ino, nid;
struct inode *inode;
- struct page *node_page;
+ unsigned int offset;
block_t bidx;
int i;
sentry = get_seg_entry(sbi, segno);
if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
- return;
+ return 0;
/* Get the previous summary */
for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
if (curseg->segno == segno) {
sum = curseg->sum_blk->entries[blkoff];
- break;
+ goto got_it;
}
}
- if (i > CURSEG_COLD_DATA) {
- struct page *sum_page = get_sum_page(sbi, segno);
- struct f2fs_summary_block *sum_node;
- kaddr = page_address(sum_page);
- sum_node = (struct f2fs_summary_block *)kaddr;
- sum = sum_node->entries[blkoff];
- f2fs_put_page(sum_page, 1);
+
+ sum_page = get_sum_page(sbi, segno);
+ sum_node = (struct f2fs_summary_block *)page_address(sum_page);
+ sum = sum_node->entries[blkoff];
+ f2fs_put_page(sum_page, 1);
+got_it:
+ /* Use the locked dnode page and inode */
+ nid = le32_to_cpu(sum.nid);
+ if (dn->inode->i_ino == nid) {
+ struct dnode_of_data tdn = *dn;
+ tdn.nid = nid;
+ tdn.node_page = dn->inode_page;
+ tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+ truncate_data_blocks_range(&tdn, 1);
+ return 0;
+ } else if (dn->nid == nid) {
+ struct dnode_of_data tdn = *dn;
+ tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+ truncate_data_blocks_range(&tdn, 1);
+ return 0;
}
/* Get the node page */
- node_page = get_node_page(sbi, le32_to_cpu(sum.nid));
- bidx = start_bidx_of_node(ofs_of_node(node_page)) +
- le16_to_cpu(sum.ofs_in_node);
+ node_page = get_node_page(sbi, nid);
+ if (IS_ERR(node_page))
+ return PTR_ERR(node_page);
+
+ offset = ofs_of_node(node_page);
ino = ino_of_node(node_page);
f2fs_put_page(node_page, 1);
/* Deallocate previous index in the node page */
inode = f2fs_iget(sbi->sb, ino);
if (IS_ERR(inode))
- return;
+ return PTR_ERR(inode);
+
+ bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
+ le16_to_cpu(sum.ofs_in_node);
truncate_hole(inode, bidx, bidx + 1);
iput(inode);
+ return 0;
}
-static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
+static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct page *page, block_t blkaddr)
{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int start, end;
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
+ int err = 0, recovered = 0;
- start = start_bidx_of_node(ofs_of_node(page));
- if (IS_INODE(page))
- end = start + ADDRS_PER_INODE;
- else
- end = start + ADDRS_PER_BLOCK;
+ if (recover_inline_data(inode, page))
+ goto out;
+
+ if (recover_xattr_data(inode, page, blkaddr))
+ goto out;
+
+ start = start_bidx_of_node(ofs_of_node(page), fi);
+ end = start + ADDRS_PER_PAGE(page, fi);
+
+ f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
- if (get_dnode_of_data(&dn, start, 0))
- return;
- wait_on_page_writeback(dn.node_page);
+ err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+ if (err) {
+ f2fs_unlock_op(sbi);
+ goto out;
+ }
+
+ f2fs_wait_on_page_writeback(dn.node_page, NODE);
get_node_info(sbi, dn.nid, &ni);
- BUG_ON(ni.ino != ino_of_node(page));
- BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page));
+ f2fs_bug_on(ni.ino != ino_of_node(page));
+ f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page));
for (; start < end; start++) {
block_t src, dest;
@@ -264,19 +333,22 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
if (src == NULL_ADDR) {
- int err = reserve_new_block(&dn);
+ err = reserve_new_block(&dn);
/* We should not get -ENOSPC */
- BUG_ON(err);
+ f2fs_bug_on(err);
}
/* Check the previous node page having this index */
- check_index_in_prev_nodes(sbi, dest);
+ err = check_index_in_prev_nodes(sbi, dest, &dn);
+ if (err)
+ goto err;
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* write dummy data page */
recover_data_page(sbi, NULL, &sum, src, dest);
update_extent_cache(dest, &dn);
+ recovered++;
}
dn.ofs_in_node++;
}
@@ -292,15 +364,23 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
set_page_dirty(dn.node_page);
recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
+err:
f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
+out:
+ f2fs_msg(sbi->sb, KERN_NOTICE,
+ "recover_data: ino = %lx, recovered = %d blocks, err = %d",
+ inode->i_ino, recovered, err);
+ return err;
}
-static void recover_data(struct f2fs_sb_info *sbi,
+static int recover_data(struct f2fs_sb_info *sbi,
struct list_head *head, int type)
{
- unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
+ unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
struct page *page;
+ int err = 0;
block_t blkaddr;
/* get node pages in the current segment */
@@ -308,25 +388,31 @@ static void recover_data(struct f2fs_sb_info *sbi,
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* read node page */
- page = alloc_page(GFP_NOFS | __GFP_ZERO);
- if (IS_ERR(page))
- return;
+ page = alloc_page(GFP_F2FS_ZERO);
+ if (!page)
+ return -ENOMEM;
+
lock_page(page);
while (1) {
struct fsync_inode_entry *entry;
- if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
- goto out;
+ err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);
+ if (err)
+ return err;
+
+ lock_page(page);
if (cp_ver != cpver_of_node(page))
- goto out;
+ break;
entry = get_fsync_inode(head, ino_of_node(page));
if (!entry)
goto next;
- do_recover_data(sbi, entry->inode, page, blkaddr);
+ err = do_recover_data(sbi, entry->inode, page, blkaddr);
+ if (err)
+ break;
if (entry->blkaddr == blkaddr) {
iput(entry->inode);
@@ -336,40 +422,48 @@ static void recover_data(struct f2fs_sb_info *sbi,
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
- ClearPageUptodate(page);
}
-out:
+
unlock_page(page);
__free_pages(page, 0);
- allocate_new_segments(sbi);
+ if (!err)
+ allocate_new_segments(sbi);
+ return err;
}
-void recover_fsync_data(struct f2fs_sb_info *sbi)
+int recover_fsync_data(struct f2fs_sb_info *sbi)
{
struct list_head inode_list;
+ int err;
+ bool need_writecp = false;
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
- sizeof(struct fsync_inode_entry), NULL);
- if (unlikely(!fsync_entry_slab))
- return;
+ sizeof(struct fsync_inode_entry));
+ if (!fsync_entry_slab)
+ return -ENOMEM;
INIT_LIST_HEAD(&inode_list);
/* step #1: find fsynced inode numbers */
- if (find_fsync_dnodes(sbi, &inode_list))
+ sbi->por_doing = true;
+ err = find_fsync_dnodes(sbi, &inode_list);
+ if (err)
goto out;
if (list_empty(&inode_list))
goto out;
+ need_writecp = true;
+
/* step #2: recover data */
- sbi->por_doing = 1;
- recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
- sbi->por_doing = 0;
- BUG_ON(!list_empty(&inode_list));
+ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
+ f2fs_bug_on(!list_empty(&inode_list));
out:
- destroy_fsync_dnodes(sbi, &inode_list);
+ destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
- write_checkpoint(sbi, false);
+ sbi->por_doing = false;
+ if (!err && need_writecp)
+ write_checkpoint(sbi, false);
+ return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 777f17e496e..d04613df710 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -13,11 +13,164 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/prefetch.h>
+#include <linux/kthread.h>
#include <linux/vmalloc.h>
+#include <linux/swap.h>
#include "f2fs.h"
#include "segment.h"
#include "node.h"
+#include <trace/events/f2fs.h>
+
+#define __reverse_ffz(x) __reverse_ffs(~(x))
+
+static struct kmem_cache *discard_entry_slab;
+
+/*
+ * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
+ * MSB and LSB are reversed in a byte by f2fs_set_bit.
+ */
+static inline unsigned long __reverse_ffs(unsigned long word)
+{
+ int num = 0;
+
+#if BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf0) == 0)
+ num += 4;
+ else
+ word >>= 4;
+ if ((word & 0xc) == 0)
+ num += 2;
+ else
+ word >>= 2;
+ if ((word & 0x2) == 0)
+ num += 1;
+ return num;
+}
+
+/*
+ * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue
+ * f2fs_set_bit makes MSB and LSB reversed in a byte.
+ * Example:
+ * LSB <--> MSB
+ * f2fs_set_bit(0, bitmap) => 0000 0001
+ * f2fs_set_bit(7, bitmap) => 1000 0000
+ */
+static unsigned long __find_rev_next_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BIT_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+ unsigned long mask, submask;
+ unsigned long quot, rest;
+
+ if (offset >= size)
+ return size;
+
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (!offset)
+ goto aligned;
+
+ tmp = *(p++);
+ quot = (offset >> 3) << 3;
+ rest = offset & 0x7;
+ mask = ~0UL << quot;
+ submask = (unsigned char)(0xff << rest) >> rest;
+ submask <<= quot;
+ mask &= submask;
+ tmp &= mask;
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+aligned:
+ while (size & ~(BITS_PER_LONG-1)) {
+ tmp = *(p++);
+ if (tmp)
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __reverse_ffs(tmp);
+}
+
+static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BIT_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+ unsigned long mask, submask;
+ unsigned long quot, rest;
+
+ if (offset >= size)
+ return size;
+
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (!offset)
+ goto aligned;
+
+ tmp = *(p++);
+ quot = (offset >> 3) << 3;
+ rest = offset & 0x7;
+ mask = ~(~0UL << quot);
+ submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
+ submask <<= quot;
+ mask += submask;
+ tmp |= mask;
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+aligned:
+ while (size & ~(BITS_PER_LONG - 1)) {
+ tmp = *(p++);
+ if (~tmp)
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __reverse_ffz(tmp);
+}
/*
* This function balances dirty node and dentry pages.
@@ -35,6 +188,114 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
}
}
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
+{
+ /* check the # of cached NAT entries and prefree segments */
+ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
+ excess_prefree_segs(sbi))
+ f2fs_sync_fs(sbi->sb, true);
+}
+
+static int issue_flush_thread(void *data)
+{
+ struct f2fs_sb_info *sbi = data;
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ wait_queue_head_t *q = &fcc->flush_wait_queue;
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ spin_lock(&fcc->issue_lock);
+ if (fcc->issue_list) {
+ fcc->dispatch_list = fcc->issue_list;
+ fcc->issue_list = fcc->issue_tail = NULL;
+ }
+ spin_unlock(&fcc->issue_lock);
+
+ if (fcc->dispatch_list) {
+ struct bio *bio = bio_alloc(GFP_NOIO, 0);
+ struct flush_cmd *cmd, *next;
+ int ret;
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ ret = submit_bio_wait(WRITE_FLUSH, bio);
+
+ for (cmd = fcc->dispatch_list; cmd; cmd = next) {
+ cmd->ret = ret;
+ next = cmd->next;
+ complete(&cmd->wait);
+ }
+ bio_put(bio);
+ fcc->dispatch_list = NULL;
+ }
+
+ wait_event_interruptible(*q,
+ kthread_should_stop() || fcc->issue_list);
+ goto repeat;
+}
+
+int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd cmd;
+
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
+
+ init_completion(&cmd.wait);
+ cmd.next = NULL;
+
+ spin_lock(&fcc->issue_lock);
+ if (fcc->issue_list)
+ fcc->issue_tail->next = &cmd;
+ else
+ fcc->issue_list = &cmd;
+ fcc->issue_tail = &cmd;
+ spin_unlock(&fcc->issue_lock);
+
+ if (!fcc->dispatch_list)
+ wake_up(&fcc->flush_wait_queue);
+
+ wait_for_completion(&cmd.wait);
+
+ return cmd.ret;
+}
+
+int create_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ struct flush_cmd_control *fcc;
+ int err = 0;
+
+ fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
+ if (!fcc)
+ return -ENOMEM;
+ spin_lock_init(&fcc->issue_lock);
+ init_waitqueue_head(&fcc->flush_wait_queue);
+ sbi->sm_info->cmd_control_info = fcc;
+ fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+ "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(fcc->f2fs_issue_flush)) {
+ err = PTR_ERR(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ sbi->sm_info->cmd_control_info = NULL;
+ return err;
+ }
+
+ return err;
+}
+
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc =
+ sbi->sm_info->cmd_control_info;
+
+ if (fcc && fcc->f2fs_issue_flush)
+ kthread_stop(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ sbi->sm_info->cmd_control_info = NULL;
+}
+
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
@@ -49,9 +310,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
if (dirty_type == DIRTY) {
struct seg_entry *sentry = get_seg_entry(sbi, segno);
- dirty_type = sentry->type;
- if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
- dirty_i->nr_dirty[dirty_type]++;
+ enum dirty_type t = sentry->type;
+
+ if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
+ dirty_i->nr_dirty[t]++;
}
}
@@ -65,12 +327,14 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
if (dirty_type == DIRTY) {
struct seg_entry *sentry = get_seg_entry(sbi, segno);
- dirty_type = sentry->type;
- if (test_and_clear_bit(segno,
- dirty_i->dirty_segmap[dirty_type]))
- dirty_i->nr_dirty[dirty_type]--;
- clear_bit(segno, dirty_i->victim_segmap[FG_GC]);
- clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
+ enum dirty_type t = sentry->type;
+
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
+ dirty_i->nr_dirty[t]--;
+
+ if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
+ clear_bit(GET_SECNO(sbi, segno),
+ dirty_i->victim_secmap);
}
}
@@ -79,7 +343,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
* Adding dirty entry into seglist is not critical operation.
* If a given segment is one of current working segments, it won't be added.
*/
-void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
+static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned short valid_blocks;
@@ -102,7 +366,69 @@ void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
}
mutex_unlock(&dirty_i->seglist_lock);
- return;
+}
+
+static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
+ block_t blkstart, block_t blklen)
+{
+ sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
+ sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
+ trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+ return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
+}
+
+void discard_next_dnode(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+ block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+ if (f2fs_issue_discard(sbi, blkaddr, 1)) {
+ struct page *page = grab_meta_page(sbi, blkaddr);
+ /* zero-filled page */
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ }
+}
+
+static void add_discard_addrs(struct f2fs_sb_info *sbi,
+ unsigned int segno, struct seg_entry *se)
+{
+ struct list_head *head = &SM_I(sbi)->discard_list;
+ struct discard_entry *new;
+ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+ int max_blocks = sbi->blocks_per_seg;
+ unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+ unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+ unsigned long dmap[entries];
+ unsigned int start = 0, end = -1;
+ int i;
+
+ if (!test_opt(sbi, DISCARD))
+ return;
+
+ /* zero block will be discarded through the prefree list */
+ if (!se->valid_blocks || se->valid_blocks == max_blocks)
+ return;
+
+ /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
+ for (i = 0; i < entries; i++)
+ dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
+
+ while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+ start = __find_rev_next_bit(dmap, max_blocks, end + 1);
+ if (start >= max_blocks)
+ break;
+
+ end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
+
+ new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
+ INIT_LIST_HEAD(&new->list);
+ new->blkaddr = START_BLOCK(sbi, segno) + start;
+ new->len = end - start;
+
+ list_add_tail(&new->list, head);
+ SM_I(sbi)->nr_discards += end - start;
+ }
}
/*
@@ -111,48 +437,58 @@ void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int segno, offset = 0;
+ unsigned int segno = -1;
unsigned int total_segs = TOTAL_SEGS(sbi);
mutex_lock(&dirty_i->seglist_lock);
while (1) {
segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
- offset);
+ segno + 1);
if (segno >= total_segs)
break;
__set_test_and_free(sbi, segno);
- offset = segno + 1;
}
mutex_unlock(&dirty_i->seglist_lock);
}
void clear_prefree_segments(struct f2fs_sb_info *sbi)
{
+ struct list_head *head = &(SM_I(sbi)->discard_list);
+ struct discard_entry *entry, *this;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int segno, offset = 0;
+ unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int total_segs = TOTAL_SEGS(sbi);
+ unsigned int start = 0, end = -1;
mutex_lock(&dirty_i->seglist_lock);
+
while (1) {
- segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
- offset);
- if (segno >= total_segs)
+ int i;
+ start = find_next_bit(prefree_map, total_segs, end + 1);
+ if (start >= total_segs)
break;
+ end = find_next_zero_bit(prefree_map, total_segs, start + 1);
- offset = segno + 1;
- if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
- dirty_i->nr_dirty[PRE]--;
-
- /* Let's use trim */
- if (test_opt(sbi, DISCARD))
- blkdev_issue_discard(sbi->sb->s_bdev,
- START_BLOCK(sbi, segno) <<
- sbi->log_sectors_per_block,
- 1 << (sbi->log_sectors_per_block +
- sbi->log_blocks_per_seg),
- GFP_NOFS, 0);
+ for (i = start; i < end; i++)
+ clear_bit(i, prefree_map);
+
+ dirty_i->nr_dirty[PRE] -= end - start;
+
+ if (!test_opt(sbi, DISCARD))
+ continue;
+
+ f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
+ (end - start) << sbi->log_blocks_per_seg);
}
mutex_unlock(&dirty_i->seglist_lock);
+
+ /* send small discards */
+ list_for_each_entry_safe(entry, this, head, list) {
+ f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
+ list_del(&entry->list);
+ SM_I(sbi)->nr_discards -= entry->len;
+ kmem_cache_free(discard_entry_slab, entry);
+ }
}
static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -181,9 +517,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
se = get_seg_entry(sbi, segno);
new_vblocks = se->valid_blocks + del;
- offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
- BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
+ f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
(new_vblocks > sbi->blocks_per_seg)));
se->valid_blocks = new_vblocks;
@@ -210,12 +546,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
get_sec_entry(sbi, segno)->valid_blocks += del;
}
-static void refresh_sit_entry(struct f2fs_sb_info *sbi,
- block_t old_blkaddr, block_t new_blkaddr)
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
{
- update_sit_entry(sbi, new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
- update_sit_entry(sbi, old_blkaddr, -1);
+ update_sit_entry(sbi, new, 1);
+ if (GET_SEGNO(sbi, old) != NULL_SEGNO)
+ update_sit_entry(sbi, old, -1);
+
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
}
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
@@ -223,7 +561,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
unsigned int segno = GET_SEGNO(sbi, addr);
struct sit_info *sit_i = SIT_I(sbi);
- BUG_ON(addr == NULL_ADDR);
+ f2fs_bug_on(addr == NULL_ADDR);
if (addr == NEW_ADDR)
return;
@@ -242,13 +580,12 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
* This function should be resided under the curseg_mutex lock
*/
static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
- struct f2fs_summary *sum, unsigned short offset)
+ struct f2fs_summary *sum)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
void *addr = curseg->sum_blk;
- addr += offset * sizeof(struct f2fs_summary);
+ addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
memcpy(addr, sum, sizeof(struct f2fs_summary));
- return;
}
/*
@@ -256,9 +593,8 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
*/
int npages_for_summary_flush(struct f2fs_sb_info *sbi)
{
- int total_size_bytes = 0;
int valid_sum_count = 0;
- int i, sum_space;
+ int i, sum_in_page;
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
if (sbi->ckpt->alloc_type[i] == SSR)
@@ -267,13 +603,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi)
valid_sum_count += curseg_blkoff(sbi, i);
}
- total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
- + sizeof(struct nat_journal) + 2
- + sizeof(struct sit_journal) + 2;
- sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
- if (total_size_bytes < sum_space)
+ sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
+ SUM_FOOTER_SIZE) / SUMMARY_SIZE;
+ if (valid_sum_count <= sum_in_page)
return 1;
- else if (total_size_bytes < 2 * sum_space)
+ else if ((valid_sum_count - sum_in_page) <=
+ (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
return 2;
return 3;
}
@@ -296,48 +631,15 @@ static void write_sum_page(struct f2fs_sb_info *sbi,
f2fs_put_page(page, 1);
}
-static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
- int ofs_unit, int type)
+static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
{
- struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE];
- unsigned int segno, next_segno, i;
- int ofs = 0;
-
- /*
- * If there is not enough reserved sections,
- * we should not reuse prefree segments.
- */
- if (has_not_enough_free_secs(sbi, 0))
- return NULL_SEGNO;
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int segno = curseg->segno + 1;
+ struct free_segmap_info *free_i = FREE_I(sbi);
- /*
- * NODE page should not reuse prefree segment,
- * since those information is used for SPOR.
- */
- if (IS_NODESEG(type))
- return NULL_SEGNO;
-next:
- segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++);
- ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit;
- if (segno < TOTAL_SEGS(sbi)) {
- /* skip intermediate segments in a section */
- if (segno % ofs_unit)
- goto next;
-
- /* skip if whole section is not prefree */
- next_segno = find_next_zero_bit(prefree_segmap,
- TOTAL_SEGS(sbi), segno + 1);
- if (next_segno - segno < ofs_unit)
- goto next;
-
- /* skip if whole section was not free at the last checkpoint */
- for (i = 0; i < ofs_unit; i++)
- if (get_seg_entry(sbi, segno)->ckpt_valid_blocks)
- goto next;
- return segno;
- }
- return NULL_SEGNO;
+ if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec)
+ return !test_bit(segno, free_i->free_segmap);
+ return 0;
}
/*
@@ -348,9 +650,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
unsigned int *newseg, bool new_sec, int dir)
{
struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int total_secs = sbi->total_sections;
unsigned int segno, secno, zoneno;
- unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone;
+ unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone;
unsigned int hint = *newseg / sbi->segs_per_sec;
unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
unsigned int left_start = hint;
@@ -363,16 +664,17 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
segno = find_next_zero_bit(free_i->free_segmap,
TOTAL_SEGS(sbi), *newseg + 1);
- if (segno < TOTAL_SEGS(sbi))
+ if (segno - *newseg < sbi->segs_per_sec -
+ (*newseg % sbi->segs_per_sec))
goto got_it;
}
find_other_zone:
- secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint);
- if (secno >= total_secs) {
+ secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint);
+ if (secno >= TOTAL_SECS(sbi)) {
if (dir == ALLOC_RIGHT) {
secno = find_next_zero_bit(free_i->free_secmap,
- total_secs, 0);
- BUG_ON(secno >= total_secs);
+ TOTAL_SECS(sbi), 0);
+ f2fs_bug_on(secno >= TOTAL_SECS(sbi));
} else {
go_left = 1;
left_start = hint - 1;
@@ -387,8 +689,8 @@ find_other_zone:
continue;
}
left_start = find_next_zero_bit(free_i->free_secmap,
- total_secs, 0);
- BUG_ON(left_start >= total_secs);
+ TOTAL_SECS(sbi), 0);
+ f2fs_bug_on(left_start >= TOTAL_SECS(sbi));
break;
}
secno = left_start;
@@ -427,7 +729,7 @@ skip_left:
}
got_it:
/* set it as dirty segment in free segmap */
- BUG_ON(test_bit(segno, free_i->free_segmap));
+ f2fs_bug_on(test_bit(segno, free_i->free_segmap));
__set_inuse(sbi, segno);
*newseg = segno;
write_unlock(&free_i->segmap_lock);
@@ -463,7 +765,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
int dir = ALLOC_LEFT;
write_sum_page(sbi, curseg->sum_blk,
- GET_SUM_BLOCK(sbi, curseg->segno));
+ GET_SUM_BLOCK(sbi, segno));
if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
dir = ALLOC_RIGHT;
@@ -480,13 +782,18 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,
struct curseg_info *seg, block_t start)
{
struct seg_entry *se = get_seg_entry(sbi, seg->segno);
- block_t ofs;
- for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
- if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
- && !f2fs_test_bit(ofs, se->cur_valid_map))
- break;
- }
- seg->next_blkoff = ofs;
+ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+ unsigned long target_map[entries];
+ unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+ unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+ int i, pos;
+
+ for (i = 0; i < entries; i++)
+ target_map[i] = ckpt_map[i] | cur_map[i];
+
+ pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
+
+ seg->next_blkoff = pos;
}
/*
@@ -561,26 +868,19 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
int type, bool force)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
- unsigned int ofs_unit;
- if (force) {
+ if (force)
new_curseg(sbi, type, true);
- goto out;
- }
-
- ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec;
- curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type);
-
- if (curseg->next_segno != NULL_SEGNO)
- change_curseg(sbi, type, false);
else if (type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
+ else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+ new_curseg(sbi, type, false);
else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
-out:
- sbi->segment_count[curseg->alloc_type]++;
+
+ stat_inc_seg_type(sbi, curseg);
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -601,121 +901,6 @@ static const struct segment_allocation default_salloc_ops = {
.allocate_segment = allocate_segment_by_default,
};
-static void f2fs_end_io_write(struct bio *bio, int err)
-{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct bio_private *p = bio->bi_private;
-
- do {
- struct page *page = bvec->bv_page;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
- if (!uptodate) {
- SetPageError(page);
- if (page->mapping)
- set_bit(AS_EIO, &page->mapping->flags);
- set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
- p->sbi->sb->s_flags |= MS_RDONLY;
- }
- end_page_writeback(page);
- dec_page_count(p->sbi, F2FS_WRITEBACK);
- } while (bvec >= bio->bi_io_vec);
-
- if (p->is_sync)
- complete(p->wait);
- kfree(p);
- bio_put(bio);
-}
-
-struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
-{
- struct bio *bio;
- struct bio_private *priv;
-retry:
- priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
- if (!priv) {
- cond_resched();
- goto retry;
- }
-
- /* No failure on bio allocation */
- bio = bio_alloc(GFP_NOIO, npages);
- bio->bi_bdev = bdev;
- bio->bi_private = priv;
- return bio;
-}
-
-static void do_submit_bio(struct f2fs_sb_info *sbi,
- enum page_type type, bool sync)
-{
- int rw = sync ? WRITE_SYNC : WRITE;
- enum page_type btype = type > META ? META : type;
-
- if (type >= META_FLUSH)
- rw = WRITE_FLUSH_FUA;
-
- if (sbi->bio[btype]) {
- struct bio_private *p = sbi->bio[btype]->bi_private;
- p->sbi = sbi;
- sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
- if (type == META_FLUSH) {
- DECLARE_COMPLETION_ONSTACK(wait);
- p->is_sync = true;
- p->wait = &wait;
- submit_bio(rw, sbi->bio[btype]);
- wait_for_completion(&wait);
- } else {
- p->is_sync = false;
- submit_bio(rw, sbi->bio[btype]);
- }
- sbi->bio[btype] = NULL;
- }
-}
-
-void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
-{
- down_write(&sbi->bio_sem);
- do_submit_bio(sbi, type, sync);
- up_write(&sbi->bio_sem);
-}
-
-static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
- block_t blk_addr, enum page_type type)
-{
- struct block_device *bdev = sbi->sb->s_bdev;
-
- verify_block_addr(sbi, blk_addr);
-
- down_write(&sbi->bio_sem);
-
- inc_page_count(sbi, F2FS_WRITEBACK);
-
- if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
- do_submit_bio(sbi, type, false);
-alloc_new:
- if (sbi->bio[type] == NULL) {
- sbi->bio[type] = f2fs_bio_alloc(bdev, bio_get_nr_vecs(bdev));
- sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
- /*
- * The end_io will be assigned at the sumbission phase.
- * Until then, let bio_add_page() merge consecutive IOs as much
- * as possible.
- */
- }
-
- if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
- PAGE_CACHE_SIZE) {
- do_submit_bio(sbi, type, false);
- goto alloc_new;
- }
-
- sbi->last_block_in_bio[type] = blk_addr;
-
- up_write(&sbi->bio_sem);
-}
-
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -756,7 +941,7 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
if (S_ISDIR(inode->i_mode))
return CURSEG_HOT_DATA;
- else if (is_cold_data(page) || is_cold_file(inode))
+ else if (is_cold_data(page) || file_is_cold(inode))
return CURSEG_COLD_DATA;
else
return CURSEG_WARM_DATA;
@@ -779,20 +964,18 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
return __get_segment_type_4(page, p_type);
}
/* NR_CURSEG_TYPE(6) logs by default */
- BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
+ f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE);
return __get_segment_type_6(page, p_type);
}
-static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
- block_t old_blkaddr, block_t *new_blkaddr,
- struct f2fs_summary *sum, enum page_type p_type)
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+ block_t old_blkaddr, block_t *new_blkaddr,
+ struct f2fs_summary *sum, int type)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
unsigned int old_cursegno;
- int type;
- type = __get_segment_type(page, p_type);
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
@@ -805,68 +988,82 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
* because, this function updates a summary entry in the
* current summary block.
*/
- __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
+ __add_sum_entry(sbi, type, sum);
mutex_lock(&sit_i->sentry_lock);
__refresh_next_blkoff(sbi, curseg);
- sbi->block_count[curseg->alloc_type]++;
+ stat_inc_block_count(sbi, curseg);
+
+ if (!__has_curseg_space(sbi, type))
+ sit_i->s_ops->allocate_segment(sbi, type, false);
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
-
- if (!__has_curseg_space(sbi, type))
- sit_i->s_ops->allocate_segment(sbi, type, false);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+
mutex_unlock(&sit_i->sentry_lock);
- if (p_type == NODE)
+ if (page && IS_NODESEG(type))
fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
- /* writeout dirty page into bdev */
- submit_write_page(sbi, page, *new_blkaddr, p_type);
-
mutex_unlock(&curseg->curseg_mutex);
}
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
+ block_t old_blkaddr, block_t *new_blkaddr,
+ struct f2fs_summary *sum, struct f2fs_io_info *fio)
+{
+ int type = __get_segment_type(page, fio->type);
+
+ allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
+
+ /* writeout dirty page into bdev */
+ f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
+}
+
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
{
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = WRITE_SYNC | REQ_META | REQ_PRIO
+ };
+
set_page_writeback(page);
- submit_write_page(sbi, page, page->index, META);
+ f2fs_submit_page_mbio(sbi, page, page->index, &fio);
}
void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
+ struct f2fs_io_info *fio,
unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
{
struct f2fs_summary sum;
set_summary(&sum, nid, 0, 0);
- do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
+ do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
}
-void write_data_page(struct inode *inode, struct page *page,
- struct dnode_of_data *dn, block_t old_blkaddr,
- block_t *new_blkaddr)
+void write_data_page(struct page *page, struct dnode_of_data *dn,
+ block_t *new_blkaddr, struct f2fs_io_info *fio)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
struct f2fs_summary sum;
struct node_info ni;
- BUG_ON(old_blkaddr == NULL_ADDR);
+ f2fs_bug_on(dn->data_blkaddr == NULL_ADDR);
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
- do_write_page(sbi, page, old_blkaddr,
- new_blkaddr, &sum, DATA);
+ do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
}
-void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
- block_t old_blk_addr)
+void rewrite_data_page(struct page *page, block_t old_blkaddr,
+ struct f2fs_io_info *fio)
{
- submit_write_page(sbi, page, old_blk_addr, DATA);
+ struct inode *inode = page->mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio);
}
void recover_data_page(struct f2fs_sb_info *sbi,
@@ -902,14 +1099,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
- (sbi->blocks_per_seg - 1);
- __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
+ __add_sum_entry(sbi, type, sum);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
@@ -925,6 +1119,10 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
unsigned int segno, old_cursegno;
block_t next_blkaddr = next_blkaddr_of_node(page);
unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
+ struct f2fs_io_info fio = {
+ .type = NODE,
+ .rw = WRITE_SYNC,
+ };
curseg = CURSEG_I(sbi, type);
@@ -939,31 +1137,62 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
curseg->next_segno = segno;
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
- (sbi->blocks_per_seg - 1);
- __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
+ __add_sum_entry(sbi, type, sum);
/* change the current log to the next block addr in advance */
if (next_segno != segno) {
curseg->next_segno = next_segno;
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
- (sbi->blocks_per_seg - 1);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
/* rewrite node page */
set_page_writeback(page);
- submit_write_page(sbi, page, new_blkaddr, NODE);
- f2fs_submit_bio(sbi, NODE, true);
+ f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
}
+static inline bool is_merged_page(struct f2fs_sb_info *sbi,
+ struct page *page, enum page_type type)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = &sbi->write_io[btype];
+ struct bio_vec *bvec;
+ int i;
+
+ down_read(&io->io_rwsem);
+ if (!io->bio)
+ goto out;
+
+ bio_for_each_segment_all(bvec, io->bio, i) {
+ if (page == bvec->bv_page) {
+ up_read(&io->io_rwsem);
+ return true;
+ }
+ }
+
+out:
+ up_read(&io->io_rwsem);
+ return false;
+}
+
+void f2fs_wait_on_page_writeback(struct page *page,
+ enum page_type type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+ if (PageWriteback(page)) {
+ if (is_merged_page(sbi, page, type))
+ f2fs_submit_merged_bio(sbi, type, WRITE);
+ wait_on_page_writeback(page);
+ }
+}
+
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1068,9 +1297,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
- if (restore_node_summary(sbi, segno, sum)) {
+ int err;
+
+ err = restore_node_summary(sbi, segno, sum);
+ if (err) {
f2fs_put_page(new, 1);
- return -EINVAL;
+ return err;
}
}
}
@@ -1091,6 +1323,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
int type = CURSEG_HOT_DATA;
+ int err;
if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
/* restore for compacted data summary */
@@ -1099,9 +1332,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
type = CURSEG_HOT_NODE;
}
- for (; type <= CURSEG_COLD_NODE; type++)
- if (read_normal_summaries(sbi, type))
- return -EINVAL;
+ for (; type <= CURSEG_COLD_NODE; type++) {
+ err = read_normal_summaries(sbi, type);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -1128,8 +1364,6 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
SUM_JOURNAL_SIZE);
written_size += SUM_JOURNAL_SIZE;
- set_page_dirty(page);
-
/* Step 3: write summary entries */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
unsigned short blkoff;
@@ -1148,18 +1382,20 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
summary = (struct f2fs_summary *)(kaddr + written_size);
*summary = seg_i->sum_blk->entries[j];
written_size += SUMMARY_SIZE;
- set_page_dirty(page);
if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
SUM_FOOTER_SIZE)
continue;
+ set_page_dirty(page);
f2fs_put_page(page, 1);
page = NULL;
}
}
- if (page)
+ if (page) {
+ set_page_dirty(page);
f2fs_put_page(page, 1);
+ }
}
static void write_normal_summaries(struct f2fs_sb_info *sbi,
@@ -1191,7 +1427,6 @@ void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
{
if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
- return;
}
int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
@@ -1246,7 +1481,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
/* get current sit block page without lock */
src_page = get_meta_page(sbi, src_off);
dst_page = grab_meta_page(sbi, dst_off);
- BUG_ON(PageDirty(src_page));
+ f2fs_bug_on(PageDirty(src_page));
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
@@ -1278,9 +1513,9 @@ static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
__mark_sit_entry_dirty(sbi, segno);
}
update_sits_in_cursum(sum, -sits_in_cursum(sum));
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/*
@@ -1315,6 +1550,10 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
+ /* add discard candidates */
+ if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards)
+ add_discard_addrs(sbi, segno, se);
+
if (flushed)
goto to_sit_page;
@@ -1390,7 +1629,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
}
if (sbi->segs_per_sec > 1) {
- sit_i->sec_entries = vzalloc(sbi->total_sections *
+ sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) *
sizeof(struct sec_entry));
if (!sit_i->sec_entries)
return -ENOMEM;
@@ -1403,10 +1642,9 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
- dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+ dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
if (!dst_bitmap)
return -ENOMEM;
- memcpy(dst_bitmap, src_bitmap, bitmap_size);
/* init SIT information */
sit_i->s_ops = &default_salloc_ops;
@@ -1442,7 +1680,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
if (!free_i->free_segmap)
return -ENOMEM;
- sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections);
+ sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi));
free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
if (!free_i->free_secmap)
return -ENOMEM;
@@ -1487,36 +1725,48 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- unsigned int start;
-
- for (start = 0; start < TOTAL_SEGS(sbi); start++) {
- struct seg_entry *se = &sit_i->sentries[start];
- struct f2fs_sit_block *sit_blk;
- struct f2fs_sit_entry sit;
- struct page *page;
- int i;
+ int sit_blk_cnt = SIT_BLK_CNT(sbi);
+ unsigned int i, start, end;
+ unsigned int readed, start_blk = 0;
+ int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
- mutex_lock(&curseg->curseg_mutex);
- for (i = 0; i < sits_in_cursum(sum); i++) {
- if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
- sit = sit_in_journal(sum, i);
- mutex_unlock(&curseg->curseg_mutex);
- goto got_it;
+ do {
+ readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
+
+ start = start_blk * sit_i->sents_per_block;
+ end = (start_blk + readed) * sit_i->sents_per_block;
+
+ for (; start < end && start < TOTAL_SEGS(sbi); start++) {
+ struct seg_entry *se = &sit_i->sentries[start];
+ struct f2fs_sit_block *sit_blk;
+ struct f2fs_sit_entry sit;
+ struct page *page;
+
+ mutex_lock(&curseg->curseg_mutex);
+ for (i = 0; i < sits_in_cursum(sum); i++) {
+ if (le32_to_cpu(segno_in_journal(sum, i))
+ == start) {
+ sit = sit_in_journal(sum, i);
+ mutex_unlock(&curseg->curseg_mutex);
+ goto got_it;
+ }
}
- }
- mutex_unlock(&curseg->curseg_mutex);
- page = get_current_sit_page(sbi, start);
- sit_blk = (struct f2fs_sit_block *)page_address(page);
- sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
- f2fs_put_page(page, 1);
+ mutex_unlock(&curseg->curseg_mutex);
+
+ page = get_current_sit_page(sbi, start);
+ sit_blk = (struct f2fs_sit_block *)page_address(page);
+ sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
+ f2fs_put_page(page, 1);
got_it:
- check_block_count(sbi, start, &sit);
- seg_info_from_raw_sit(se, &sit);
- if (sbi->segs_per_sec > 1) {
- struct sec_entry *e = get_sec_entry(sbi, start);
- e->valid_blocks += se->valid_blocks;
+ check_block_count(sbi, start, &sit);
+ seg_info_from_raw_sit(se, &sit);
+ if (sbi->segs_per_sec > 1) {
+ struct sec_entry *e = get_sec_entry(sbi, start);
+ e->valid_blocks += se->valid_blocks;
+ }
}
- }
+ start_blk += readed;
+ } while (start_blk < sit_blk_cnt);
}
static void init_free_segmap(struct f2fs_sb_info *sbi)
@@ -1541,13 +1791,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int segno = 0, offset = 0;
+ unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi);
unsigned short valid_blocks;
- while (segno < TOTAL_SEGS(sbi)) {
+ while (1) {
/* find dirty segment based on free segmap */
- segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset);
- if (segno >= TOTAL_SEGS(sbi))
+ segno = find_next_inuse(free_i, total_segs, offset);
+ if (segno >= total_segs)
break;
offset = segno + 1;
valid_blocks = get_valid_blocks(sbi, segno, 0);
@@ -1559,14 +1809,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
}
}
-static int init_victim_segmap(struct f2fs_sb_info *sbi)
+static int init_victim_secmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
+ unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi));
- dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
- dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
- if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC])
+ dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
+ if (!dirty_i->victim_secmap)
return -ENOMEM;
return 0;
}
@@ -1593,7 +1842,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
}
init_dirty_segmap(sbi);
- return init_victim_segmap(sbi);
+ return init_victim_secmap(sbi);
}
/*
@@ -1637,8 +1886,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
/* init sm info */
sbi->sm_info = sm_info;
- INIT_LIST_HEAD(&sm_info->wblist_head);
- spin_lock_init(&sm_info->wblist_lock);
sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
@@ -1646,6 +1893,20 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+ sm_info->rec_prefree_segments = sm_info->main_segments *
+ DEF_RECLAIM_PREFREE_SEGMENTS / 100;
+ sm_info->ipu_policy = F2FS_IPU_DISABLE;
+ sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
+
+ INIT_LIST_HEAD(&sm_info->discard_list);
+ sm_info->nr_discards = 0;
+ sm_info->max_discards = 0;
+
+ if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ return err;
+ }
err = build_sit_info(sbi);
if (err)
@@ -1680,18 +1941,10 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
mutex_unlock(&dirty_i->seglist_lock);
}
-void reset_victim_segmap(struct f2fs_sb_info *sbi)
-{
- unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
- memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size);
-}
-
-static void destroy_victim_segmap(struct f2fs_sb_info *sbi)
+static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-
- kfree(dirty_i->victim_segmap[FG_GC]);
- kfree(dirty_i->victim_segmap[BG_GC]);
+ kfree(dirty_i->victim_secmap);
}
static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
@@ -1706,7 +1959,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_DIRTY_TYPE; i++)
discard_dirty_segmap(sbi, i);
- destroy_victim_segmap(sbi);
+ destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
kfree(dirty_i);
}
@@ -1761,6 +2014,10 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
void destroy_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
+
+ if (!sm_info)
+ return;
+ destroy_flush_cmd_control(sbi);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
@@ -1768,3 +2025,17 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
sbi->sm_info = NULL;
kfree(sm_info);
}
+
+int __init create_segment_manager_caches(void)
+{
+ discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
+ sizeof(struct discard_entry));
+ if (!discard_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void destroy_segment_manager_caches(void)
+{
+ kmem_cache_destroy(discard_entry_slab);
+}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 552dadbb232..7091204680f 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -8,28 +8,28 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/blkdev.h>
+
/* constant macro */
#define NULL_SEGNO ((unsigned int)(~0))
+#define NULL_SECNO ((unsigned int)(~0))
+
+#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
-/* V: Logical segment # in volume, R: Relative segment # in main area */
+/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
-#define IS_DATASEG(t) \
- ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \
- (t == CURSEG_WARM_DATA))
+#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA)
+#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE)
-#define IS_NODESEG(t) \
- ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \
- (t == CURSEG_WARM_NODE))
-
-#define IS_CURSEG(sbi, segno) \
- ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
- (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
- (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
- (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
- (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
- (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
+#define IS_CURSEG(sbi, seg) \
+ ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
#define IS_CURSEC(sbi, secno) \
((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
@@ -57,6 +57,9 @@
((blk_addr) - SM_I(sbi)->seg0_blkaddr)
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
+#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
+ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
+
#define GET_SEGNO(sbi, blk_addr) \
(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
@@ -78,19 +81,19 @@
(segno / SIT_ENTRY_PER_BLOCK)
#define START_SEGNO(sit_i, segno) \
(SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK)
+#define SIT_BLK_CNT(sbi) \
+ ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
#define f2fs_bitmap_size(nr) \
(BITS_TO_LONGS(nr) * sizeof(unsigned long))
#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
+#define TOTAL_SECS(sbi) (sbi->total_sections)
#define SECTOR_FROM_BLOCK(sbi, blk_addr) \
- (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
-
-/* during checkpoint, bio_private is used to synchronize the last bio */
-struct bio_private {
- struct f2fs_sb_info *sbi;
- bool is_sync;
- void *wait;
-};
+ (((sector_t)blk_addr) << (sbi)->log_sectors_per_block)
+#define SECTOR_TO_BLOCK(sbi, sectors) \
+ (sectors >> (sbi)->log_sectors_per_block)
+#define MAX_BIO_BLOCKS(max_hw_blocks) \
+ (min((int)max_hw_blocks, BIO_MAX_PAGES))
/*
* indicate a block allocation direction: RIGHT and LEFT.
@@ -136,6 +139,7 @@ struct victim_sel_policy {
int alloc_mode; /* LFS or SSR */
int gc_mode; /* GC_CB or GC_GREEDY */
unsigned long *dirty_segmap; /* dirty segment bitmap */
+ unsigned int max_search; /* maximum # of segments to search */
unsigned int offset; /* last scanned bitmap offset */
unsigned int ofs_unit; /* bitmap search unit */
unsigned int min_cost; /* minimum cost */
@@ -213,7 +217,7 @@ struct dirty_seglist_info {
unsigned long *dirty_segmap[NR_DIRTY_TYPE];
struct mutex seglist_lock; /* lock for segment bitmaps */
int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
- unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */
+ unsigned long *victim_secmap; /* background GC victims */
};
/* victim selection function for cleaning and SSR */
@@ -376,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
static inline block_t written_block_count(struct f2fs_sb_info *sbi)
{
- struct sit_info *sit_i = SIT_I(sbi);
- block_t vblocks;
-
- mutex_lock(&sit_i->sentry_lock);
- vblocks = sit_i->written_valid_blocks;
- mutex_unlock(&sit_i->sentry_lock);
-
- return vblocks;
+ return SIT_I(sbi)->written_valid_blocks;
}
static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
{
- struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int free_segs;
-
- read_lock(&free_i->segmap_lock);
- free_segs = free_i->free_segments;
- read_unlock(&free_i->segmap_lock);
-
- return free_segs;
+ return FREE_I(sbi)->free_segments;
}
static inline int reserved_segments(struct f2fs_sb_info *sbi)
@@ -405,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)
static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
{
- struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int free_secs;
-
- read_lock(&free_i->segmap_lock);
- free_secs = free_i->free_sections;
- read_unlock(&free_i->segmap_lock);
-
- return free_secs;
+ return FREE_I(sbi)->free_sections;
}
static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
@@ -447,7 +430,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
- return (free_sections(sbi) < overprovision_sections(sbi));
+ return (prefree_segments(sbi) / sbi->segs_per_sec)
+ + free_sections(sbi) < overprovision_sections(sbi);
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
@@ -455,34 +439,71 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
- if (sbi->por_doing)
+ if (unlikely(sbi->por_doing))
return false;
- return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
- reserved_sections(sbi)));
+ return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
+ reserved_sections(sbi));
+}
+
+static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
+{
+ return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
}
static inline int utilization(struct f2fs_sb_info *sbi)
{
- return (long int)valid_user_blocks(sbi) * 100 /
- (long int)sbi->user_block_count;
+ return div_u64((u64)valid_user_blocks(sbi) * 100,
+ sbi->user_block_count);
}
/*
* Sometimes f2fs may be better to drop out-of-place update policy.
- * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write
- * data in the original place likewise other traditional file systems.
- * But, currently set 100 in percentage, which means it is disabled.
- * See below need_inplace_update().
+ * And, users can control the policy through sysfs entries.
+ * There are five policies with triggering conditions as follows.
+ * F2FS_IPU_FORCE - all the time,
+ * F2FS_IPU_SSR - if SSR mode is activated,
+ * F2FS_IPU_UTIL - if FS utilization is over threashold,
+ * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over
+ * threashold,
+ * F2FS_IPUT_DISABLE - disable IPU. (=default option)
*/
-#define MIN_IPU_UTIL 100
+#define DEF_MIN_IPU_UTIL 70
+
+enum {
+ F2FS_IPU_FORCE,
+ F2FS_IPU_SSR,
+ F2FS_IPU_UTIL,
+ F2FS_IPU_SSR_UTIL,
+ F2FS_IPU_DISABLE,
+};
+
static inline bool need_inplace_update(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+
+ /* IPU can be done only for the user data */
if (S_ISDIR(inode->i_mode))
return false;
- if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL)
+
+ switch (SM_I(sbi)->ipu_policy) {
+ case F2FS_IPU_FORCE:
return true;
+ case F2FS_IPU_SSR:
+ if (need_SSR(sbi))
+ return true;
+ break;
+ case F2FS_IPU_UTIL:
+ if (utilization(sbi) > SM_I(sbi)->min_ipu_util)
+ return true;
+ break;
+ case F2FS_IPU_SSR_UTIL:
+ if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
+ return true;
+ break;
+ case F2FS_IPU_DISABLE:
+ break;
+ }
return false;
}
@@ -506,16 +527,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
return curseg->next_blkoff;
}
+#ifdef CONFIG_F2FS_CHECK_FS
static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
{
unsigned int end_segno = SM_I(sbi)->segment_count - 1;
BUG_ON(segno > end_segno);
}
-/*
- * This function is used for only debugging.
- * NOTE: In future, we have to remove this function.
- */
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
@@ -534,8 +552,9 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
unsigned int end_segno = sm_info->segment_count - 1;
+ bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
- int i;
+ int cur_pos = 0, next_pos;
/* check segment usage */
BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
@@ -544,11 +563,26 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
BUG_ON(segno > end_segno);
/* check bitmap with valid block count */
- for (i = 0; i < sbi->blocks_per_seg; i++)
- if (f2fs_test_bit(i, raw_sit->valid_map))
- valid_blocks++;
+ do {
+ if (is_valid) {
+ next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ cur_pos);
+ valid_blocks += next_pos - cur_pos;
+ } else
+ next_pos = find_next_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ cur_pos);
+ cur_pos = next_pos;
+ is_valid = !is_valid;
+ } while (cur_pos < sbi->blocks_per_seg);
BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
}
+#else
+#define check_seg_range(sbi, segno)
+#define verify_block_addr(sbi, blk_addr)
+#define check_block_count(sbi, segno, raw_sit)
+#endif
static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
unsigned int start)
@@ -616,3 +650,60 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count)
- (base + 1) + type;
}
+
+static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
+{
+ if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
+ return true;
+ return false;
+}
+
+static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
+{
+ struct block_device *bdev = sbi->sb->s_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
+}
+
+/*
+ * It is very important to gather dirty pages and write at once, so that we can
+ * submit a big bio without interfering other data writes.
+ * By default, 512 pages for directory data,
+ * 512 pages (2MB) * 3 for three types of nodes, and
+ * max_bio_blocks for meta are set.
+ */
+static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
+{
+ if (type == DATA)
+ return sbi->blocks_per_seg;
+ else if (type == NODE)
+ return 3 * sbi->blocks_per_seg;
+ else if (type == META)
+ return MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+ else
+ return 0;
+}
+
+/*
+ * When writing pages, it'd better align nr_to_write for segment size.
+ */
+static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
+ struct writeback_control *wbc)
+{
+ long nr_to_write, desired;
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ return 0;
+
+ nr_to_write = wbc->nr_to_write;
+
+ if (type == DATA)
+ desired = 4096;
+ else if (type == NODE)
+ desired = 3 * max_hw_blocks(sbi);
+ else
+ desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+
+ wbc->nr_to_write = desired;
+ return desired - nr_to_write;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fea6e582a2e..8f96d9372ad 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -12,47 +12,213 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/statfs.h>
-#include <linux/proc_fs.h>
#include <linux/buffer_head.h>
#include <linux/backing-dev.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <linux/random.h>
#include <linux/exportfs.h>
+#include <linux/blkdev.h>
#include <linux/f2fs_fs.h>
+#include <linux/sysfs.h>
#include "f2fs.h"
#include "node.h"
+#include "segment.h"
#include "xattr.h"
+#include "gc.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/f2fs.h>
+static struct proc_dir_entry *f2fs_proc_root;
static struct kmem_cache *f2fs_inode_cachep;
+static struct kset *f2fs_kset;
enum {
- Opt_gc_background_off,
+ Opt_gc_background,
Opt_disable_roll_forward,
Opt_discard,
Opt_noheap,
+ Opt_user_xattr,
Opt_nouser_xattr,
+ Opt_acl,
Opt_noacl,
Opt_active_logs,
Opt_disable_ext_identify,
+ Opt_inline_xattr,
+ Opt_inline_data,
+ Opt_flush_merge,
Opt_err,
};
static match_table_t f2fs_tokens = {
- {Opt_gc_background_off, "background_gc_off"},
+ {Opt_gc_background, "background_gc=%s"},
{Opt_disable_roll_forward, "disable_roll_forward"},
{Opt_discard, "discard"},
{Opt_noheap, "no_heap"},
+ {Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
+ {Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_active_logs, "active_logs=%u"},
{Opt_disable_ext_identify, "disable_ext_identify"},
+ {Opt_inline_xattr, "inline_xattr"},
+ {Opt_inline_data, "inline_data"},
+ {Opt_flush_merge, "flush_merge"},
{Opt_err, NULL},
};
+/* Sysfs support for f2fs */
+enum {
+ GC_THREAD, /* struct f2fs_gc_thread */
+ SM_INFO, /* struct f2fs_sm_info */
+ NM_INFO, /* struct f2fs_nm_info */
+ F2FS_SBI, /* struct f2fs_sb_info */
+};
+
+struct f2fs_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
+ ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
+ const char *, size_t);
+ int struct_type;
+ int offset;
+};
+
+static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
+{
+ if (struct_type == GC_THREAD)
+ return (unsigned char *)sbi->gc_thread;
+ else if (struct_type == SM_INFO)
+ return (unsigned char *)SM_I(sbi);
+ else if (struct_type == NM_INFO)
+ return (unsigned char *)NM_I(sbi);
+ else if (struct_type == F2FS_SBI)
+ return (unsigned char *)sbi;
+ return NULL;
+}
+
+static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ unsigned char *ptr = NULL;
+ unsigned int *ui;
+
+ ptr = __struct_ptr(sbi, a->struct_type);
+ if (!ptr)
+ return -EINVAL;
+
+ ui = (unsigned int *)(ptr + a->offset);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ unsigned char *ptr;
+ unsigned long t;
+ unsigned int *ui;
+ ssize_t ret;
+
+ ptr = __struct_ptr(sbi, a->struct_type);
+ if (!ptr)
+ return -EINVAL;
+
+ ui = (unsigned int *)(ptr + a->offset);
+
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret < 0)
+ return ret;
+ *ui = t;
+ return count;
+}
+
+static ssize_t f2fs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+ return a->show ? a->show(a, sbi, buf) : 0;
+}
+
+static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+ return a->store ? a->store(a, sbi, buf, len) : 0;
+}
+
+static void f2fs_sb_release(struct kobject *kobj)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ complete(&sbi->s_kobj_unregister);
+}
+
+#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
+static struct f2fs_attr f2fs_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ .struct_type = _struct_type, \
+ .offset = _offset \
+}
+
+#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
+ F2FS_ATTR_OFFSET(struct_type, name, 0644, \
+ f2fs_sbi_show, f2fs_sbi_store, \
+ offsetof(struct struct_name, elname))
+
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
+
+#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
+static struct attribute *f2fs_attrs[] = {
+ ATTR_LIST(gc_min_sleep_time),
+ ATTR_LIST(gc_max_sleep_time),
+ ATTR_LIST(gc_no_gc_sleep_time),
+ ATTR_LIST(gc_idle),
+ ATTR_LIST(reclaim_segments),
+ ATTR_LIST(max_small_discards),
+ ATTR_LIST(ipu_policy),
+ ATTR_LIST(min_ipu_util),
+ ATTR_LIST(max_victim_search),
+ ATTR_LIST(dir_level),
+ ATTR_LIST(ram_thresh),
+ NULL,
+};
+
+static const struct sysfs_ops f2fs_attr_ops = {
+ .show = f2fs_attr_show,
+ .store = f2fs_attr_store,
+};
+
+static struct kobj_type f2fs_ktype = {
+ .default_attrs = f2fs_attrs,
+ .sysfs_ops = &f2fs_attr_ops,
+ .release = f2fs_sb_release,
+};
+
void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
{
struct va_format vaf;
@@ -72,28 +238,170 @@ static void init_once(void *foo)
inode_init_once(&fi->vfs_inode);
}
+static int parse_options(struct super_block *sb, char *options)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ substring_t args[MAX_OPT_ARGS];
+ char *p, *name;
+ int arg = 0;
+
+ if (!options)
+ return 0;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+ /*
+ * Initialize args struct so we know whether arg was
+ * found; some options take optional arguments.
+ */
+ args[0].to = args[0].from = NULL;
+ token = match_token(p, f2fs_tokens, args);
+
+ switch (token) {
+ case Opt_gc_background:
+ name = match_strdup(&args[0]);
+
+ if (!name)
+ return -ENOMEM;
+ if (strlen(name) == 2 && !strncmp(name, "on", 2))
+ set_opt(sbi, BG_GC);
+ else if (strlen(name) == 3 && !strncmp(name, "off", 3))
+ clear_opt(sbi, BG_GC);
+ else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
+ case Opt_disable_roll_forward:
+ set_opt(sbi, DISABLE_ROLL_FORWARD);
+ break;
+ case Opt_discard:
+ set_opt(sbi, DISCARD);
+ break;
+ case Opt_noheap:
+ set_opt(sbi, NOHEAP);
+ break;
+#ifdef CONFIG_F2FS_FS_XATTR
+ case Opt_user_xattr:
+ set_opt(sbi, XATTR_USER);
+ break;
+ case Opt_nouser_xattr:
+ clear_opt(sbi, XATTR_USER);
+ break;
+ case Opt_inline_xattr:
+ set_opt(sbi, INLINE_XATTR);
+ break;
+#else
+ case Opt_user_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "user_xattr options not supported");
+ break;
+ case Opt_nouser_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "nouser_xattr options not supported");
+ break;
+ case Opt_inline_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "inline_xattr options not supported");
+ break;
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+ case Opt_acl:
+ set_opt(sbi, POSIX_ACL);
+ break;
+ case Opt_noacl:
+ clear_opt(sbi, POSIX_ACL);
+ break;
+#else
+ case Opt_acl:
+ f2fs_msg(sb, KERN_INFO, "acl options not supported");
+ break;
+ case Opt_noacl:
+ f2fs_msg(sb, KERN_INFO, "noacl options not supported");
+ break;
+#endif
+ case Opt_active_logs:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
+ return -EINVAL;
+ sbi->active_logs = arg;
+ break;
+ case Opt_disable_ext_identify:
+ set_opt(sbi, DISABLE_EXT_IDENTIFY);
+ break;
+ case Opt_inline_data:
+ set_opt(sbi, INLINE_DATA);
+ break;
+ case Opt_flush_merge:
+ set_opt(sbi, FLUSH_MERGE);
+ break;
+ default:
+ f2fs_msg(sb, KERN_ERR,
+ "Unrecognized mount option \"%s\" or missing value",
+ p);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
static struct inode *f2fs_alloc_inode(struct super_block *sb)
{
struct f2fs_inode_info *fi;
- fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO);
+ fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
if (!fi)
return NULL;
init_once((void *) fi);
- /* Initilize f2fs-specific inode info */
+ /* Initialize f2fs-specific inode info */
fi->vfs_inode.i_version = 1;
atomic_set(&fi->dirty_dents, 0);
fi->i_current_depth = 1;
fi->i_advise = 0;
rwlock_init(&fi->ext.ext_lock);
+ init_rwsem(&fi->i_sem);
set_inode_flag(fi, FI_NEW_INODE);
+ if (test_opt(F2FS_SB(sb), INLINE_XATTR))
+ set_inode_flag(fi, FI_INLINE_XATTR);
+
+ /* Will be used by directory only */
+ fi->i_dir_level = F2FS_SB(sb)->dir_level;
+
return &fi->vfs_inode;
}
+static int f2fs_drop_inode(struct inode *inode)
+{
+ /*
+ * This is to avoid a deadlock condition like below.
+ * writeback_single_inode(inode)
+ * - f2fs_write_data_page
+ * - f2fs_gc -> iput -> evict
+ * - inode_wait_for_writeback(inode)
+ */
+ if (!inode_unhashed(inode) && inode->i_state & I_SYNC)
+ return 0;
+ return generic_drop_inode(inode);
+}
+
+/*
+ * f2fs_dirty_inode() is called from __mark_inode_dirty()
+ *
+ * We should call set_dirty_inode to write the dirty inode through write_inode.
+ */
+static void f2fs_dirty_inode(struct inode *inode, int flags)
+{
+ set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
+}
+
static void f2fs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -109,10 +417,18 @@ static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ if (sbi->s_proc) {
+ remove_proc_entry("segment_info", sbi->s_proc);
+ remove_proc_entry(sb->s_id, f2fs_proc_root);
+ }
+ kobject_del(&sbi->s_kobj);
+
f2fs_destroy_stats(sbi);
stop_gc_thread(sbi);
- write_checkpoint(sbi, true);
+ /* We don't need to do checkpoint when it's clean */
+ if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES))
+ write_checkpoint(sbi, true);
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -122,6 +438,8 @@ static void f2fs_put_super(struct super_block *sb)
destroy_segment_manager(sbi);
kfree(sbi->ckpt);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
sb->s_fs_info = NULL;
brelse(sbi->raw_super_buf);
@@ -132,13 +450,18 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ trace_f2fs_sync_fs(sb, sync);
+
if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
return 0;
- if (sync)
+ if (sync) {
+ mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, false);
- else
+ mutex_unlock(&sbi->gc_mutex);
+ } else {
f2fs_balance_fs(sbi);
+ }
return 0;
}
@@ -147,7 +470,7 @@ static int f2fs_freeze(struct super_block *sb)
{
int err;
- if (sb->s_flags & MS_RDONLY)
+ if (f2fs_readonly(sb))
return 0;
err = f2fs_sync_fs(sb, 1);
@@ -180,7 +503,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = sbi->total_node_count;
buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
- buf->f_namelen = F2FS_MAX_NAME_LEN;
+ buf->f_namelen = F2FS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
@@ -191,10 +514,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
{
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
- if (test_opt(sbi, BG_GC))
- seq_puts(seq, ",background_gc_on");
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
+ seq_printf(seq, ",background_gc=%s", "on");
else
- seq_puts(seq, ",background_gc_off");
+ seq_printf(seq, ",background_gc=%s", "off");
if (test_opt(sbi, DISABLE_ROLL_FORWARD))
seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, DISCARD))
@@ -206,6 +529,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",user_xattr");
else
seq_puts(seq, ",nouser_xattr");
+ if (test_opt(sbi, INLINE_XATTR))
+ seq_puts(seq, ",inline_xattr");
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
@@ -215,16 +540,139 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
#endif
if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
seq_puts(seq, ",disable_ext_identify");
-
+ if (test_opt(sbi, INLINE_DATA))
+ seq_puts(seq, ",inline_data");
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
+ seq_puts(seq, ",flush_merge");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
}
+static int segment_info_seq_show(struct seq_file *seq, void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ unsigned int total_segs =
+ le32_to_cpu(sbi->raw_super->segment_count_main);
+ int i;
+
+ seq_puts(seq, "format: segment_type|valid_blocks\n"
+ "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
+ for (i = 0; i < total_segs; i++) {
+ struct seg_entry *se = get_seg_entry(sbi, i);
+
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-5d", i);
+ seq_printf(seq, "%d|%-3u", se->type,
+ get_valid_blocks(sbi, i, 1));
+ if ((i % 10) == 9 || i == (total_segs - 1))
+ seq_putc(seq, '\n');
+ else
+ seq_putc(seq, ' ');
+ }
+
+ return 0;
+}
+
+static int segment_info_open_fs(struct inode *inode, struct file *file)
+{
+ return single_open(file, segment_info_seq_show, PDE_DATA(inode));
+}
+
+static const struct file_operations f2fs_seq_segment_info_fops = {
+ .owner = THIS_MODULE,
+ .open = segment_info_open_fs,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int f2fs_remount(struct super_block *sb, int *flags, char *data)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_mount_info org_mount_opt;
+ int err, active_logs;
+ bool need_restart_gc = false;
+ bool need_stop_gc = false;
+
+ sync_filesystem(sb);
+
+ /*
+ * Save the old mount options in case we
+ * need to restore them.
+ */
+ org_mount_opt = sbi->mount_opt;
+ active_logs = sbi->active_logs;
+
+ /* parse mount options */
+ err = parse_options(sb, data);
+ if (err)
+ goto restore_opts;
+
+ /*
+ * Previous and new state of filesystem is RO,
+ * so skip checking GC and FLUSH_MERGE conditions.
+ */
+ if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+ goto skip;
+
+ /*
+ * We stop the GC thread if FS is mounted as RO
+ * or if background_gc = off is passed in mount
+ * option. Also sync the filesystem.
+ */
+ if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
+ if (sbi->gc_thread) {
+ stop_gc_thread(sbi);
+ f2fs_sync_fs(sb, 1);
+ need_restart_gc = true;
+ }
+ } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
+ err = start_gc_thread(sbi);
+ if (err)
+ goto restore_opts;
+ need_stop_gc = true;
+ }
+
+ /*
+ * We stop issue flush thread if FS is mounted as RO
+ * or if flush_merge is not passed in mount option.
+ */
+ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+ destroy_flush_cmd_control(sbi);
+ } else if (test_opt(sbi, FLUSH_MERGE) &&
+ !sbi->sm_info->cmd_control_info) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ goto restore_gc;
+ }
+skip:
+ /* Update the POSIXACL Flag */
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
+ return 0;
+restore_gc:
+ if (need_restart_gc) {
+ if (start_gc_thread(sbi))
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "background gc thread is stop");
+ } else if (need_stop_gc) {
+ stop_gc_thread(sbi);
+ }
+restore_opts:
+ sbi->mount_opt = org_mount_opt;
+ sbi->active_logs = active_logs;
+ return err;
+}
+
static struct super_operations f2fs_sops = {
.alloc_inode = f2fs_alloc_inode,
+ .drop_inode = f2fs_drop_inode,
.destroy_inode = f2fs_destroy_inode,
.write_inode = f2fs_write_inode,
+ .dirty_inode = f2fs_dirty_inode,
.show_options = f2fs_show_options,
.evict_inode = f2fs_evict_inode,
.put_super = f2fs_put_super,
@@ -232,6 +680,7 @@ static struct super_operations f2fs_sops = {
.freeze_fs = f2fs_freeze,
.unfreeze_fs = f2fs_unfreeze,
.statfs = f2fs_statfs,
+ .remount_fs = f2fs_remount,
};
static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
@@ -240,7 +689,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
- if (ino < F2FS_ROOT_INO(sbi))
+ if (check_nid_range(sbi, ino))
return ERR_PTR(-ESTALE);
/*
@@ -251,7 +700,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
inode = f2fs_iget(sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
- if (generation && inode->i_generation != generation) {
+ if (unlikely(generation && inode->i_generation != generation)) {
/* we didn't find the right inode.. */
iput(inode);
return ERR_PTR(-ESTALE);
@@ -279,82 +728,9 @@ static const struct export_operations f2fs_export_ops = {
.get_parent = f2fs_get_parent,
};
-static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi,
- char *options)
-{
- substring_t args[MAX_OPT_ARGS];
- char *p;
- int arg = 0;
-
- if (!options)
- return 0;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
- /*
- * Initialize args struct so we know whether arg was
- * found; some options take optional arguments.
- */
- args[0].to = args[0].from = NULL;
- token = match_token(p, f2fs_tokens, args);
-
- switch (token) {
- case Opt_gc_background_off:
- clear_opt(sbi, BG_GC);
- break;
- case Opt_disable_roll_forward:
- set_opt(sbi, DISABLE_ROLL_FORWARD);
- break;
- case Opt_discard:
- set_opt(sbi, DISCARD);
- break;
- case Opt_noheap:
- set_opt(sbi, NOHEAP);
- break;
-#ifdef CONFIG_F2FS_FS_XATTR
- case Opt_nouser_xattr:
- clear_opt(sbi, XATTR_USER);
- break;
-#else
- case Opt_nouser_xattr:
- f2fs_msg(sb, KERN_INFO,
- "nouser_xattr options not supported");
- break;
-#endif
-#ifdef CONFIG_F2FS_FS_POSIX_ACL
- case Opt_noacl:
- clear_opt(sbi, POSIX_ACL);
- break;
-#else
- case Opt_noacl:
- f2fs_msg(sb, KERN_INFO, "noacl options not supported");
- break;
-#endif
- case Opt_active_logs:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
- return -EINVAL;
- sbi->active_logs = arg;
- break;
- case Opt_disable_ext_identify:
- set_opt(sbi, DISABLE_EXT_IDENTIFY);
- break;
- default:
- f2fs_msg(sb, KERN_ERR,
- "Unrecognized mount option \"%s\" or missing value",
- p);
- return -EINVAL;
- }
- }
- return 0;
-}
-
static loff_t max_file_size(unsigned bits)
{
- loff_t result = ADDRS_PER_INODE;
+ loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
loff_t leaf_count = ADDRS_PER_BLOCK;
/* two direct node blocks */
@@ -427,10 +803,10 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
- if (fsmeta >= total)
+ if (unlikely(fsmeta >= total))
return 1;
- if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
+ if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
}
@@ -457,35 +833,57 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
+ sbi->cur_victim_sec = NULL_SECNO;
+ sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
+
+ sbi->dir_level = DEF_DIR_LEVEL;
}
-static int validate_superblock(struct super_block *sb,
- struct f2fs_super_block **raw_super,
- struct buffer_head **raw_super_buf, sector_t block)
+/*
+ * Read f2fs raw super block.
+ * Because we have two copies of super block, so read the first one at first,
+ * if the first one is invalid, move to read the second one.
+ */
+static int read_raw_super_block(struct super_block *sb,
+ struct f2fs_super_block **raw_super,
+ struct buffer_head **raw_super_buf)
{
- const char *super = (block == 0 ? "first" : "second");
+ int block = 0;
- /* read f2fs raw super block */
+retry:
*raw_super_buf = sb_bread(sb, block);
if (!*raw_super_buf) {
- f2fs_msg(sb, KERN_ERR, "unable to read %s superblock",
- super);
- return 1;
+ f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
+ block + 1);
+ if (block == 0) {
+ block++;
+ goto retry;
+ } else {
+ return -EIO;
+ }
}
*raw_super = (struct f2fs_super_block *)
((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
/* sanity checking of raw super */
- if (!sanity_check_raw_super(sb, *raw_super))
- return 0;
+ if (sanity_check_raw_super(sb, *raw_super)) {
+ brelse(*raw_super_buf);
+ f2fs_msg(sb, KERN_ERR,
+ "Can't find valid F2FS filesystem in %dth superblock",
+ block + 1);
+ if (block == 0) {
+ block++;
+ goto retry;
+ } else {
+ return -EINVAL;
+ }
+ }
- f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
- "in %s superblock", super);
- return 1;
+ return 0;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -503,16 +901,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
/* set a block size */
- if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) {
+ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
goto free_sbi;
}
- if (validate_superblock(sb, &raw_super, &raw_super_buf, 0)) {
- brelse(raw_super_buf);
- if (validate_superblock(sb, &raw_super, &raw_super_buf, 1))
- goto free_sb_buf;
- }
+ err = read_raw_super_block(sb, &raw_super, &raw_super_buf);
+ if (err)
+ goto free_sbi;
+
+ sb->s_fs_info = sbi;
/* init some FS parameters */
sbi->active_logs = NR_CURSEG_TYPE;
@@ -525,7 +923,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi, POSIX_ACL);
#endif
/* parse mount options */
- if (parse_options(sb, sbi, (char *)data))
+ err = parse_options(sb, (char *)data);
+ if (err)
goto free_sb_buf;
sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
@@ -536,7 +935,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_xattr = f2fs_xattr_handlers;
sb->s_export_op = &f2fs_export_ops;
sb->s_magic = F2FS_SUPER_MAGIC;
- sb->s_fs_info = sbi;
sb->s_time_gran = 1;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
@@ -547,14 +945,23 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sbi->raw_super = raw_super;
sbi->raw_super_buf = raw_super_buf;
mutex_init(&sbi->gc_mutex);
- mutex_init(&sbi->write_inode);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
- for (i = 0; i < NR_LOCK_TYPE; i++)
- mutex_init(&sbi->fs_lock[i]);
- sbi->por_doing = 0;
+ mutex_init(&sbi->node_write);
+ sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
- init_rwsem(&sbi->bio_sem);
+
+ init_rwsem(&sbi->read_io.io_rwsem);
+ sbi->read_io.sbi = sbi;
+ sbi->read_io.bio = NULL;
+ for (i = 0; i < NR_PAGE_TYPE; i++) {
+ init_rwsem(&sbi->write_io[i].io_rwsem);
+ sbi->write_io[i].sbi = sbi;
+ sbi->write_io[i].bio = NULL;
+ }
+
+ init_rwsem(&sbi->cp_rwsem);
+ init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
/* get an inode for meta space */
@@ -617,9 +1024,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
}
/* if there are nt orphan nodes free them */
- err = -EINVAL;
- if (recover_orphan_inodes(sbi))
- goto free_node_inode;
+ recover_orphan_inodes(sbi);
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
@@ -628,8 +1033,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
err = PTR_ERR(root);
goto free_node_inode;
}
- if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size)
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ err = -EINVAL;
goto free_root_inode;
+ }
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
@@ -637,22 +1044,60 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_root_inode;
}
- /* recover fsynced data */
- if (!test_opt(sbi, DISABLE_ROLL_FORWARD))
- recover_fsync_data(sbi);
-
- /* After POR, we can run background GC thread */
- err = start_gc_thread(sbi);
+ err = f2fs_build_stats(sbi);
if (err)
- goto fail;
+ goto free_root_inode;
- err = f2fs_build_stats(sbi);
+ if (f2fs_proc_root)
+ sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+
+ if (sbi->s_proc)
+ proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
+ &f2fs_seq_segment_info_fops, sb);
+
+ if (test_opt(sbi, DISCARD)) {
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ if (!blk_queue_discard(q))
+ f2fs_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but "
+ "the device does not support discard");
+ }
+
+ sbi->s_kobj.kset = f2fs_kset;
+ init_completion(&sbi->s_kobj_unregister);
+ err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
+ "%s", sb->s_id);
if (err)
- goto fail;
+ goto free_proc;
+ /* recover fsynced data */
+ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+ err = recover_fsync_data(sbi);
+ if (err)
+ f2fs_msg(sb, KERN_ERR,
+ "Cannot recover all fsync data errno=%ld", err);
+ }
+
+ /*
+ * If filesystem is not mounted as read-only then
+ * do start the gc_thread.
+ */
+ if (!(sb->s_flags & MS_RDONLY)) {
+ /* After POR, we can run background GC thread.*/
+ err = start_gc_thread(sbi);
+ if (err)
+ goto free_kobj;
+ }
return 0;
-fail:
- stop_gc_thread(sbi);
+
+free_kobj:
+ kobject_del(&sbi->s_kobj);
+free_proc:
+ if (sbi->s_proc) {
+ remove_proc_entry("segment_info", sbi->s_proc);
+ remove_proc_entry(sb->s_id, f2fs_proc_root);
+ }
+ f2fs_destroy_stats(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
@@ -692,8 +1137,8 @@ MODULE_ALIAS_FS("f2fs");
static int __init init_inodecache(void)
{
f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
- sizeof(struct f2fs_inode_info), NULL);
- if (f2fs_inode_cachep == NULL)
+ sizeof(struct f2fs_inode_info));
+ if (!f2fs_inode_cachep)
return -ENOMEM;
return 0;
}
@@ -717,29 +1162,55 @@ static int __init init_f2fs_fs(void)
goto fail;
err = create_node_manager_caches();
if (err)
- goto fail;
+ goto free_inodecache;
+ err = create_segment_manager_caches();
+ if (err)
+ goto free_node_manager_caches;
err = create_gc_caches();
if (err)
- goto fail;
+ goto free_segment_manager_caches;
err = create_checkpoint_caches();
if (err)
- goto fail;
+ goto free_gc_caches;
+ f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
+ if (!f2fs_kset) {
+ err = -ENOMEM;
+ goto free_checkpoint_caches;
+ }
err = register_filesystem(&f2fs_fs_type);
if (err)
- goto fail;
+ goto free_kset;
f2fs_create_root_stats();
+ f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+ return 0;
+
+free_kset:
+ kset_unregister(f2fs_kset);
+free_checkpoint_caches:
+ destroy_checkpoint_caches();
+free_gc_caches:
+ destroy_gc_caches();
+free_segment_manager_caches:
+ destroy_segment_manager_caches();
+free_node_manager_caches:
+ destroy_node_manager_caches();
+free_inodecache:
+ destroy_inodecache();
fail:
return err;
}
static void __exit exit_f2fs_fs(void)
{
+ remove_proc_entry("fs/f2fs", NULL);
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
destroy_checkpoint_caches();
destroy_gc_caches();
+ destroy_segment_manager_caches();
destroy_node_manager_caches();
destroy_inodecache();
+ kset_unregister(f2fs_kset);
}
module_init(init_f2fs_fs)
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8038c049650..8bea941ee30 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -20,11 +20,13 @@
*/
#include <linux/rwsem.h>
#include <linux/f2fs_fs.h>
+#include <linux/security.h>
+#include <linux/posix_acl_xattr.h>
#include "f2fs.h"
#include "xattr.h"
static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
- size_t list_size, const char *name, size_t name_len, int type)
+ size_t list_size, const char *name, size_t len, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
int total_len, prefix_len = 0;
@@ -43,15 +45,19 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
prefix = XATTR_TRUSTED_PREFIX;
prefix_len = XATTR_TRUSTED_PREFIX_LEN;
break;
+ case F2FS_XATTR_INDEX_SECURITY:
+ prefix = XATTR_SECURITY_PREFIX;
+ prefix_len = XATTR_SECURITY_PREFIX_LEN;
+ break;
default:
return -EINVAL;
}
- total_len = prefix_len + name_len + 1;
+ total_len = prefix_len + len + 1;
if (list && total_len <= list_size) {
memcpy(list, prefix, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
+ memcpy(list + prefix_len, name, len);
+ list[prefix_len + len] = '\0';
}
return total_len;
}
@@ -70,13 +76,14 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
+ case F2FS_XATTR_INDEX_SECURITY:
+ break;
default:
return -EINVAL;
}
if (strcmp(name, "") == 0)
return -EINVAL;
- return f2fs_getxattr(dentry->d_inode, type, name,
- buffer, size);
+ return f2fs_getxattr(dentry->d_inode, type, name, buffer, size);
}
static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
@@ -93,17 +100,20 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
+ case F2FS_XATTR_INDEX_SECURITY:
+ break;
default:
return -EINVAL;
}
if (strcmp(name, "") == 0)
return -EINVAL;
- return f2fs_setxattr(dentry->d_inode, type, name, value, size);
+ return f2fs_setxattr(dentry->d_inode, type, name,
+ value, size, NULL, flags);
}
static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
- size_t list_size, const char *name, size_t name_len, int type)
+ size_t list_size, const char *name, size_t len, int type)
{
const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
size_t size;
@@ -145,6 +155,31 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
return 0;
}
+#ifdef CONFIG_F2FS_FS_SECURITY
+static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *page)
+{
+ const struct xattr *xattr;
+ int err = 0;
+
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
+ xattr->name, xattr->value,
+ xattr->value_len, (struct page *)page, 0);
+ if (err < 0)
+ break;
+ }
+ return err;
+}
+
+int f2fs_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr, struct page *ipage)
+{
+ return security_inode_init_security(inode, dir, qstr,
+ &f2fs_initxattrs, ipage);
+}
+#endif
+
const struct xattr_handler f2fs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = F2FS_XATTR_INDEX_USER,
@@ -169,106 +204,253 @@ const struct xattr_handler f2fs_xattr_advise_handler = {
.set = f2fs_xattr_advise_set,
};
+const struct xattr_handler f2fs_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .flags = F2FS_XATTR_INDEX_SECURITY,
+ .list = f2fs_xattr_generic_list,
+ .get = f2fs_xattr_generic_get,
+ .set = f2fs_xattr_generic_set,
+};
+
static const struct xattr_handler *f2fs_xattr_handler_map[] = {
[F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
- [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler,
- [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler,
+ [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
+ [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
#endif
[F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,
+#ifdef CONFIG_F2FS_FS_SECURITY
+ [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler,
+#endif
[F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler,
};
const struct xattr_handler *f2fs_xattr_handlers[] = {
&f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
- &f2fs_xattr_acl_access_handler,
- &f2fs_xattr_acl_default_handler,
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
#endif
&f2fs_xattr_trusted_handler,
+#ifdef CONFIG_F2FS_FS_SECURITY
+ &f2fs_xattr_security_handler,
+#endif
&f2fs_xattr_advise_handler,
NULL,
};
-static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
+static inline const struct xattr_handler *f2fs_xattr_handler(int index)
{
const struct xattr_handler *handler = NULL;
- if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map))
- handler = f2fs_xattr_handler_map[name_index];
+ if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map))
+ handler = f2fs_xattr_handler_map[index];
return handler;
}
-int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
+ size_t len, const char *name)
+{
+ struct f2fs_xattr_entry *entry;
+
+ list_for_each_xattr(entry, base_addr) {
+ if (entry->e_name_index != index)
+ continue;
+ if (entry->e_name_len != len)
+ continue;
+ if (!memcmp(entry->e_name, name, len))
+ break;
+ }
+ return entry;
+}
+
+static void *read_all_xattrs(struct inode *inode, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_xattr_header *header;
+ size_t size = PAGE_SIZE, inline_size = 0;
+ void *txattr_addr;
+
+ inline_size = inline_xattr_size(inode);
+
+ txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
+ if (!txattr_addr)
+ return NULL;
+
+ /* read from inline xattr */
+ if (inline_size) {
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(ipage);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page))
+ goto fail;
+ inline_addr = inline_xattr_addr(page);
+ }
+ memcpy(txattr_addr, inline_addr, inline_size);
+ f2fs_put_page(page, 1);
+ }
+
+ /* read from xattr node block */
+ if (F2FS_I(inode)->i_xattr_nid) {
+ struct page *xpage;
+ void *xattr_addr;
+
+ /* The inode already has an extended attribute block. */
+ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
+ if (IS_ERR(xpage))
+ goto fail;
+
+ xattr_addr = page_address(xpage);
+ memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
+ f2fs_put_page(xpage, 1);
+ }
+
+ header = XATTR_HDR(txattr_addr);
+
+ /* never been allocated xattrs */
+ if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
+ header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
+ header->h_refcount = cpu_to_le32(1);
+ }
+ return txattr_addr;
+fail:
+ kzfree(txattr_addr);
+ return NULL;
+}
+
+static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
+ void *txattr_addr, struct page *ipage)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ size_t inline_size = 0;
+ void *xattr_addr;
+ struct page *xpage;
+ nid_t new_nid = 0;
+ int err;
+
+ inline_size = inline_xattr_size(inode);
+
+ if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
+ if (!alloc_nid(sbi, &new_nid))
+ return -ENOSPC;
+
+ /* write to inline xattr */
+ if (inline_size) {
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(page);
+ }
+ inline_addr = inline_xattr_addr(page);
+ f2fs_wait_on_page_writeback(page, NODE);
+ }
+ memcpy(inline_addr, txattr_addr, inline_size);
+ f2fs_put_page(page, 1);
+
+ /* no need to use xattr node block */
+ if (hsize <= inline_size) {
+ err = truncate_xattr_node(inode, ipage);
+ alloc_nid_failed(sbi, new_nid);
+ return err;
+ }
+ }
+
+ /* write to xattr node block */
+ if (F2FS_I(inode)->i_xattr_nid) {
+ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
+ if (IS_ERR(xpage)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(xpage);
+ }
+ f2fs_bug_on(new_nid);
+ f2fs_wait_on_page_writeback(xpage, NODE);
+ } else {
+ struct dnode_of_data dn;
+ set_new_dnode(&dn, inode, NULL, NULL, new_nid);
+ xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
+ if (IS_ERR(xpage)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(xpage);
+ }
+ alloc_nid_done(sbi, new_nid);
+ }
+
+ xattr_addr = page_address(xpage);
+ memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
+ sizeof(struct node_footer));
+ set_page_dirty(xpage);
+ f2fs_put_page(xpage, 1);
+
+ /* need to checkpoint during fsync */
+ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
+ return 0;
+}
+
+int f2fs_getxattr(struct inode *inode, int index, const char *name,
+ void *buffer, size_t buffer_size)
+{
struct f2fs_xattr_entry *entry;
- struct page *page;
void *base_addr;
- int error = 0, found = 0;
- size_t value_len, name_len;
+ int error = 0;
+ size_t size, len;
if (name == NULL)
return -EINVAL;
- name_len = strlen(name);
- if (!fi->i_xattr_nid)
- return -ENODATA;
+ len = strlen(name);
+ if (len > F2FS_NAME_LEN)
+ return -ERANGE;
- page = get_node_page(sbi, fi->i_xattr_nid);
- base_addr = page_address(page);
+ base_addr = read_all_xattrs(inode, NULL);
+ if (!base_addr)
+ return -ENOMEM;
- list_for_each_xattr(entry, base_addr) {
- if (entry->e_name_index != name_index)
- continue;
- if (entry->e_name_len != name_len)
- continue;
- if (!memcmp(entry->e_name, name, name_len)) {
- found = 1;
- break;
- }
- }
- if (!found) {
+ entry = __find_xattr(base_addr, index, len, name);
+ if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
- value_len = le16_to_cpu(entry->e_value_size);
+ size = le16_to_cpu(entry->e_value_size);
- if (buffer && value_len > buffer_size) {
+ if (buffer && size > buffer_size) {
error = -ERANGE;
goto cleanup;
}
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
- memcpy(buffer, pval, value_len);
+ memcpy(buffer, pval, size);
}
- error = value_len;
+ error = size;
cleanup:
- f2fs_put_page(page, 1);
+ kzfree(base_addr);
return error;
}
ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct inode *inode = dentry->d_inode;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_xattr_entry *entry;
- struct page *page;
void *base_addr;
int error = 0;
size_t rest = buffer_size;
- if (!fi->i_xattr_nid)
- return 0;
-
- page = get_node_page(sbi, fi->i_xattr_nid);
- base_addr = page_address(page);
+ base_addr = read_all_xattrs(inode, NULL);
+ if (!base_addr)
+ return -ENOMEM;
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
@@ -291,116 +473,77 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
}
error = buffer_size - rest;
cleanup:
- f2fs_put_page(page, 1);
+ kzfree(base_addr);
return error;
}
-int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
- const void *value, size_t value_len)
+static int __f2fs_setxattr(struct inode *inode, int index,
+ const char *name, const void *value, size_t size,
+ struct page *ipage, int flags)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_xattr_header *header = NULL;
struct f2fs_xattr_entry *here, *last;
- struct page *page;
void *base_addr;
- int error, found, free, newsize;
- size_t name_len;
- char *pval;
+ int found, newsize;
+ size_t len;
+ __u32 new_hsize;
+ int error = -ENOMEM;
if (name == NULL)
return -EINVAL;
- name_len = strlen(name);
if (value == NULL)
- value_len = 0;
+ size = 0;
- if (name_len > 255 || value_len > MAX_VALUE_LEN)
- return -ERANGE;
+ len = strlen(name);
- f2fs_balance_fs(sbi);
-
- mutex_lock_op(sbi, NODE_NEW);
- if (!fi->i_xattr_nid) {
- /* Allocate new attribute block */
- struct dnode_of_data dn;
-
- if (!alloc_nid(sbi, &fi->i_xattr_nid)) {
- mutex_unlock_op(sbi, NODE_NEW);
- return -ENOSPC;
- }
- set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid);
- mark_inode_dirty(inode);
-
- page = new_node_page(&dn, XATTR_NODE_OFFSET);
- if (IS_ERR(page)) {
- alloc_nid_failed(sbi, fi->i_xattr_nid);
- fi->i_xattr_nid = 0;
- mutex_unlock_op(sbi, NODE_NEW);
- return PTR_ERR(page);
- }
+ if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode))
+ return -ERANGE;
- alloc_nid_done(sbi, fi->i_xattr_nid);
- base_addr = page_address(page);
- header = XATTR_HDR(base_addr);
- header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
- header->h_refcount = cpu_to_le32(1);
- } else {
- /* The inode already has an extended attribute block. */
- page = get_node_page(sbi, fi->i_xattr_nid);
- if (IS_ERR(page)) {
- mutex_unlock_op(sbi, NODE_NEW);
- return PTR_ERR(page);
- }
+ base_addr = read_all_xattrs(inode, ipage);
+ if (!base_addr)
+ goto exit;
- base_addr = page_address(page);
- header = XATTR_HDR(base_addr);
- }
+ /* find entry with wanted name. */
+ here = __find_xattr(base_addr, index, len, name);
- if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
- error = -EIO;
- goto cleanup;
- }
+ found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
- /* find entry with wanted name. */
- found = 0;
- list_for_each_xattr(here, base_addr) {
- if (here->e_name_index != name_index)
- continue;
- if (here->e_name_len != name_len)
- continue;
- if (!memcmp(here->e_name, name, name_len)) {
- found = 1;
- break;
- }
+ if ((flags & XATTR_REPLACE) && !found) {
+ error = -ENODATA;
+ goto exit;
+ } else if ((flags & XATTR_CREATE) && found) {
+ error = -EEXIST;
+ goto exit;
}
last = here;
-
while (!IS_XATTR_LAST_ENTRY(last))
last = XATTR_NEXT_ENTRY(last);
- newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
- name_len + value_len);
+ newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
/* 1. Check space */
if (value) {
- /* If value is NULL, it is remove operation.
+ int free;
+ /*
+ * If value is NULL, it is remove operation.
* In case of update operation, we caculate free.
*/
- free = MIN_OFFSET - ((char *)last - (char *)header);
+ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
if (found)
- free = free - ENTRY_SIZE(here);
+ free = free + ENTRY_SIZE(here);
- if (free < newsize) {
+ if (unlikely(free < newsize)) {
error = -ENOSPC;
- goto cleanup;
+ goto exit;
}
}
/* 2. Remove old entry */
if (found) {
- /* If entry is found, remove old entry.
+ /*
+ * If entry is found, remove old entry.
* If not found, remove operation is not needed.
*/
struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
@@ -411,33 +554,63 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
memset(last, 0, oldsize);
}
+ new_hsize = (char *)last - (char *)base_addr;
+
/* 3. Write new entry */
if (value) {
- /* Before we come here, old entry is removed.
- * We just write new entry. */
+ char *pval;
+ /*
+ * Before we come here, old entry is removed.
+ * We just write new entry.
+ */
memset(last, 0, newsize);
- last->e_name_index = name_index;
- last->e_name_len = name_len;
- memcpy(last->e_name, name, name_len);
- pval = last->e_name + name_len;
- memcpy(pval, value, value_len);
- last->e_value_size = cpu_to_le16(value_len);
+ last->e_name_index = index;
+ last->e_name_len = len;
+ memcpy(last->e_name, name, len);
+ pval = last->e_name + len;
+ memcpy(pval, value, size);
+ last->e_value_size = cpu_to_le16(size);
+ new_hsize += newsize;
}
- set_page_dirty(page);
- f2fs_put_page(page, 1);
+ error = write_all_xattrs(inode, new_hsize, base_addr, ipage);
+ if (error)
+ goto exit;
if (is_inode_flag_set(fi, FI_ACL_MODE)) {
inode->i_mode = fi->i_acl_mode;
inode->i_ctime = CURRENT_TIME;
clear_inode_flag(fi, FI_ACL_MODE);
}
- f2fs_write_inode(inode, NULL);
- mutex_unlock_op(sbi, NODE_NEW);
- return 0;
-cleanup:
- f2fs_put_page(page, 1);
- mutex_unlock_op(sbi, NODE_NEW);
+ if (ipage)
+ update_inode(inode, ipage);
+ else
+ update_inode_page(inode);
+exit:
+ kzfree(base_addr);
return error;
}
+
+int f2fs_setxattr(struct inode *inode, int index, const char *name,
+ const void *value, size_t size,
+ struct page *ipage, int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ int err;
+
+ /* this case is only from init_inode_metadata */
+ if (ipage)
+ return __f2fs_setxattr(inode, index, name, value,
+ size, ipage, flags);
+ f2fs_balance_fs(sbi);
+
+ f2fs_lock_op(sbi);
+ /* protect xattr_ver */
+ down_write(&F2FS_I(inode)->i_sem);
+ err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
+ up_write(&F2FS_I(inode)->i_sem);
+ f2fs_unlock_op(sbi);
+
+ return err;
+}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 49c9558305e..34ab7dbcf5e 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -51,7 +51,7 @@ struct f2fs_xattr_entry {
#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr))
#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr))
-#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr)+1))
+#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1))
#define XATTR_ROUND (3)
#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND)
@@ -69,17 +69,16 @@ struct f2fs_xattr_entry {
!IS_XATTR_LAST_ENTRY(entry);\
entry = XATTR_NEXT_ENTRY(entry))
+#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \
+ sizeof(struct node_footer) - sizeof(__u32))
-#define MIN_OFFSET XATTR_ALIGN(PAGE_SIZE - \
- sizeof(struct node_footer) - \
- sizeof(__u32))
-
-#define MAX_VALUE_LEN (MIN_OFFSET - sizeof(struct f2fs_xattr_header) - \
- sizeof(struct f2fs_xattr_entry))
+#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
+ sizeof(struct f2fs_xattr_header) - \
+ sizeof(struct f2fs_xattr_entry))
/*
* On-disk structure of f2fs_xattr
- * We use only 1 block for xattr.
+ * We use inline xattrs space + 1 block for xattr.
*
* +--------------------+
* | f2fs_xattr_header |
@@ -109,28 +108,24 @@ struct f2fs_xattr_entry {
#ifdef CONFIG_F2FS_FS_XATTR
extern const struct xattr_handler f2fs_xattr_user_handler;
extern const struct xattr_handler f2fs_xattr_trusted_handler;
-extern const struct xattr_handler f2fs_xattr_acl_access_handler;
-extern const struct xattr_handler f2fs_xattr_acl_default_handler;
extern const struct xattr_handler f2fs_xattr_advise_handler;
+extern const struct xattr_handler f2fs_xattr_security_handler;
extern const struct xattr_handler *f2fs_xattr_handlers[];
-extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
- const void *value, size_t value_len);
-extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size);
-extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
- size_t buffer_size);
-
+extern int f2fs_setxattr(struct inode *, int, const char *,
+ const void *, size_t, struct page *, int);
+extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);
+extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
#else
#define f2fs_xattr_handlers NULL
-static inline int f2fs_setxattr(struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len)
+static inline int f2fs_setxattr(struct inode *inode, int index,
+ const char *name, const void *value, size_t size, int flags)
{
return -EOPNOTSUPP;
}
-static inline int f2fs_getxattr(struct inode *inode, int name_index,
+static inline int f2fs_getxattr(struct inode *inode, int index,
const char *name, void *buffer, size_t buffer_size)
{
return -EOPNOTSUPP;
@@ -142,4 +137,14 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
}
#endif
+#ifdef CONFIG_F2FS_FS_SECURITY
+extern int f2fs_init_security(struct inode *, struct inode *,
+ const struct qstr *, struct page *);
+#else
+static inline int f2fs_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr, struct page *ipage)
+{
+ return 0;
+}
+#endif
#endif /* __F2FS_XATTR_H__ */