diff options
Diffstat (limited to 'fs/f2fs')
| -rw-r--r-- | fs/f2fs/Kconfig | 8 | ||||
| -rw-r--r-- | fs/f2fs/Makefile | 2 | ||||
| -rw-r--r-- | fs/f2fs/acl.c | 182 | ||||
| -rw-r--r-- | fs/f2fs/acl.h | 14 | ||||
| -rw-r--r-- | fs/f2fs/checkpoint.c | 519 | ||||
| -rw-r--r-- | fs/f2fs/data.c | 750 | ||||
| -rw-r--r-- | fs/f2fs/debug.c | 63 | ||||
| -rw-r--r-- | fs/f2fs/dir.c | 146 | ||||
| -rw-r--r-- | fs/f2fs/f2fs.h | 448 | ||||
| -rw-r--r-- | fs/f2fs/file.c | 301 | ||||
| -rw-r--r-- | fs/f2fs/gc.c | 69 | ||||
| -rw-r--r-- | fs/f2fs/gc.h | 2 | ||||
| -rw-r--r-- | fs/f2fs/inline.c | 250 | ||||
| -rw-r--r-- | fs/f2fs/inode.c | 121 | ||||
| -rw-r--r-- | fs/f2fs/namei.c | 80 | ||||
| -rw-r--r-- | fs/f2fs/node.c | 712 | ||||
| -rw-r--r-- | fs/f2fs/node.h | 44 | ||||
| -rw-r--r-- | fs/f2fs/recovery.c | 164 | ||||
| -rw-r--r-- | fs/f2fs/segment.c | 846 | ||||
| -rw-r--r-- | fs/f2fs/segment.h | 190 | ||||
| -rw-r--r-- | fs/f2fs/super.c | 326 | ||||
| -rw-r--r-- | fs/f2fs/xattr.c | 140 | ||||
| -rw-r--r-- | fs/f2fs/xattr.h | 10 | 
23 files changed, 3551 insertions, 1836 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index e06e0995e00..214fe1054fc 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -63,3 +63,11 @@ config F2FS_FS_SECURITY  	  the extended attribute support in advance.  	  If you are not using a security module, say N. + +config F2FS_CHECK_FS +	bool "F2FS consistency checking feature" +	depends on F2FS_FS +	help +	  Enables BUG_ONs which check the file system consistency in runtime. + +	  If you want to improve the performance, say N. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 27a0820340b..2e35da12d29 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -1,6 +1,6 @@  obj-$(CONFIG_F2FS_FS) += f2fs.o -f2fs-y		:= dir.o file.o inode.o namei.o hash.o super.o +f2fs-y		:= dir.o file.o inode.o namei.o hash.o super.o inline.o  f2fs-y		+= checkpoint.o gc.o data.o node.o segment.o recovery.o  f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o  f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index b7826ec1b47..dbe2141d10a 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -17,9 +17,6 @@  #include "xattr.h"  #include "acl.h" -#define get_inode_mode(i)	((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ -					(F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) -  static inline size_t f2fs_acl_size(int count)  {  	if (count <= 4) { @@ -167,25 +164,17 @@ fail:  struct posix_acl *f2fs_get_acl(struct inode *inode, int type)  { -	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;  	void *value = NULL;  	struct posix_acl *acl;  	int retval; -	if (!test_opt(sbi, POSIX_ACL)) -		return NULL; - -	acl = get_cached_acl(inode, type); -	if (acl != ACL_NOT_CACHED) -		return acl; -  	if (type == ACL_TYPE_ACCESS)  		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;  	retval = f2fs_getxattr(inode, name_index, "", NULL, 0);  	if (retval > 0) { -		value = kmalloc(retval, GFP_KERNEL); +		value = kmalloc(retval, GFP_F2FS_ZERO);  		if (!value)  			return ERR_PTR(-ENOMEM);  		retval = f2fs_getxattr(inode, name_index, "", value, retval); @@ -205,19 +194,20 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)  	return acl;  } -static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +static int __f2fs_set_acl(struct inode *inode, int type, +			struct posix_acl *acl, struct page *ipage)  { -	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	struct f2fs_inode_info *fi = F2FS_I(inode);  	int name_index;  	void *value = NULL;  	size_t size = 0;  	int error; -	if (!test_opt(sbi, POSIX_ACL)) -		return 0; -	if (S_ISLNK(inode->i_mode)) -		return -EOPNOTSUPP; +	if (acl) { +		error = posix_acl_valid(acl); +		if (error < 0) +			return error; +	}  	switch (type) {  	case ACL_TYPE_ACCESS: @@ -250,7 +240,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)  		}  	} -	error = f2fs_setxattr(inode, name_index, "", value, size, NULL); +	error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);  	kfree(value);  	if (!error) @@ -260,153 +250,31 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)  	return error;  } -int f2fs_init_acl(struct inode *inode, struct inode *dir) +int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)  { -	struct posix_acl *acl = NULL; -	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); -	int error = 0; - -	if (!S_ISLNK(inode->i_mode)) { -		if (test_opt(sbi, POSIX_ACL)) { -			acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); -			if (IS_ERR(acl)) -				return PTR_ERR(acl); -		} -		if (!acl) -			inode->i_mode &= ~current_umask(); -	} - -	if (test_opt(sbi, POSIX_ACL) && acl) { - -		if (S_ISDIR(inode->i_mode)) { -			error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); -			if (error) -				goto cleanup; -		} -		error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); -		if (error < 0) -			return error; -		if (error > 0) -			error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); -	} -cleanup: -	posix_acl_release(acl); -	return error; +	return __f2fs_set_acl(inode, type, acl, NULL);  } -int f2fs_acl_chmod(struct inode *inode) +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)  { -	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	struct posix_acl *acl; -	int error; -	umode_t mode = get_inode_mode(inode); - -	if (!test_opt(sbi, POSIX_ACL)) -		return 0; -	if (S_ISLNK(mode)) -		return -EOPNOTSUPP; - -	acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); -	if (IS_ERR(acl) || !acl) -		return PTR_ERR(acl); +	struct posix_acl *default_acl, *acl; +	int error = 0; -	error = posix_acl_chmod(&acl, GFP_KERNEL, mode); +	error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);  	if (error)  		return error; -	error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); -	posix_acl_release(acl); -	return error; -} - -static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, -		size_t list_size, const char *name, size_t name_len, int type) -{ -	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); -	const char *xname = POSIX_ACL_XATTR_DEFAULT; -	size_t size; -	if (!test_opt(sbi, POSIX_ACL)) -		return 0; - -	if (type == ACL_TYPE_ACCESS) -		xname = POSIX_ACL_XATTR_ACCESS; - -	size = strlen(xname) + 1; -	if (list && size <= list_size) -		memcpy(list, xname, size); -	return size; -} - -static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, -		void *buffer, size_t size, int type) -{ -	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); -	struct posix_acl *acl; -	int error; - -	if (strcmp(name, "") != 0) -		return -EINVAL; -	if (!test_opt(sbi, POSIX_ACL)) -		return -EOPNOTSUPP; - -	acl = f2fs_get_acl(dentry->d_inode, type); -	if (IS_ERR(acl)) -		return PTR_ERR(acl); -	if (!acl) -		return -ENODATA; -	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); -	posix_acl_release(acl); - -	return error; -} - -static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, -		const void *value, size_t size, int flags, int type) -{ -	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); -	struct inode *inode = dentry->d_inode; -	struct posix_acl *acl = NULL; -	int error; - -	if (strcmp(name, "") != 0) -		return -EINVAL; -	if (!test_opt(sbi, POSIX_ACL)) -		return -EOPNOTSUPP; -	if (!inode_owner_or_capable(inode)) -		return -EPERM; - -	if (value) { -		acl = posix_acl_from_xattr(&init_user_ns, value, size); -		if (IS_ERR(acl)) -			return PTR_ERR(acl); -		if (acl) { -			error = posix_acl_valid(acl); -			if (error) -				goto release_and_out; -		} -	} else { -		acl = NULL; +	if (default_acl) { +		error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, +				       ipage); +		posix_acl_release(default_acl); +	} +	if (acl) { +		if (error) +			error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, +					       ipage); +		posix_acl_release(acl);  	} -	error = f2fs_set_acl(inode, type, acl); - -release_and_out: -	posix_acl_release(acl);  	return error;  } - -const struct xattr_handler f2fs_xattr_acl_default_handler = { -	.prefix = POSIX_ACL_XATTR_DEFAULT, -	.flags = ACL_TYPE_DEFAULT, -	.list = f2fs_xattr_list_acl, -	.get = f2fs_xattr_get_acl, -	.set = f2fs_xattr_set_acl, -}; - -const struct xattr_handler f2fs_xattr_acl_access_handler = { -	.prefix = POSIX_ACL_XATTR_ACCESS, -	.flags = ACL_TYPE_ACCESS, -	.list = f2fs_xattr_list_acl, -	.get = f2fs_xattr_get_acl, -	.set = f2fs_xattr_set_acl, -}; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 80f43067441..e0864651cdc 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -36,20 +36,16 @@ struct f2fs_acl_header {  #ifdef CONFIG_F2FS_FS_POSIX_ACL -extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); -extern int f2fs_acl_chmod(struct inode *inode); -extern int f2fs_init_acl(struct inode *inode, struct inode *dir); +extern struct posix_acl *f2fs_get_acl(struct inode *, int); +extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);  #else  #define f2fs_check_acl	NULL  #define f2fs_get_acl	NULL  #define f2fs_set_acl	NULL -static inline int f2fs_acl_chmod(struct inode *inode) -{ -	return 0; -} - -static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) +static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, +							struct page *page)  {  	return 0;  } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bb312201ca9..0b4710c1d37 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -30,7 +30,7 @@ static struct kmem_cache *inode_entry_slab;   */  struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)  { -	struct address_space *mapping = sbi->meta_inode->i_mapping; +	struct address_space *mapping = META_MAPPING(sbi);  	struct page *page = NULL;  repeat:  	page = grab_cache_page(mapping, index); @@ -38,9 +38,7 @@ repeat:  		cond_resched();  		goto repeat;  	} - -	/* We wait writeback only inside grab_meta_page() */ -	wait_on_page_writeback(page); +	f2fs_wait_on_page_writeback(page, META);  	SetPageUptodate(page);  	return page;  } @@ -50,7 +48,7 @@ repeat:   */  struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)  { -	struct address_space *mapping = sbi->meta_inode->i_mapping; +	struct address_space *mapping = META_MAPPING(sbi);  	struct page *page;  repeat:  	page = grab_cache_page(mapping, index); @@ -61,67 +59,154 @@ repeat:  	if (PageUptodate(page))  		goto out; -	if (f2fs_readpage(sbi, page, index, READ_SYNC)) +	if (f2fs_submit_page_bio(sbi, page, index, +				READ_SYNC | REQ_META | REQ_PRIO))  		goto repeat;  	lock_page(page); -	if (page->mapping != mapping) { +	if (unlikely(page->mapping != mapping)) {  		f2fs_put_page(page, 1);  		goto repeat;  	}  out: -	mark_page_accessed(page);  	return page;  } +static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +{ +	switch (type) { +	case META_NAT: +		return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; +	case META_SIT: +		return SIT_BLK_CNT(sbi); +	case META_SSA: +	case META_CP: +		return 0; +	default: +		BUG(); +	} +} + +/* + * Readahead CP/NAT/SIT/SSA pages + */ +int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) +{ +	block_t prev_blk_addr = 0; +	struct page *page; +	int blkno = start; +	int max_blks = get_max_meta_blks(sbi, type); + +	struct f2fs_io_info fio = { +		.type = META, +		.rw = READ_SYNC | REQ_META | REQ_PRIO +	}; + +	for (; nrpages-- > 0; blkno++) { +		block_t blk_addr; + +		switch (type) { +		case META_NAT: +			/* get nat block addr */ +			if (unlikely(blkno >= max_blks)) +				blkno = 0; +			blk_addr = current_nat_addr(sbi, +					blkno * NAT_ENTRY_PER_BLOCK); +			break; +		case META_SIT: +			/* get sit block addr */ +			if (unlikely(blkno >= max_blks)) +				goto out; +			blk_addr = current_sit_addr(sbi, +					blkno * SIT_ENTRY_PER_BLOCK); +			if (blkno != start && prev_blk_addr + 1 != blk_addr) +				goto out; +			prev_blk_addr = blk_addr; +			break; +		case META_SSA: +		case META_CP: +			/* get ssa/cp block addr */ +			blk_addr = blkno; +			break; +		default: +			BUG(); +		} + +		page = grab_cache_page(META_MAPPING(sbi), blk_addr); +		if (!page) +			continue; +		if (PageUptodate(page)) { +			f2fs_put_page(page, 1); +			continue; +		} + +		f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); +		f2fs_put_page(page, 0); +	} +out: +	f2fs_submit_merged_bio(sbi, META, READ); +	return blkno - start; +} +  static int f2fs_write_meta_page(struct page *page,  				struct writeback_control *wbc)  {  	struct inode *inode = page->mapping->host;  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	/* Should not write any meta pages, if any IO error was occurred */ -	if (wbc->for_reclaim || -			is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { -		dec_page_count(sbi, F2FS_DIRTY_META); -		wbc->pages_skipped++; -		set_page_dirty(page); -		return AOP_WRITEPAGE_ACTIVATE; -	} +	trace_f2fs_writepage(page, META); -	wait_on_page_writeback(page); +	if (unlikely(sbi->por_doing)) +		goto redirty_out; +	if (wbc->for_reclaim) +		goto redirty_out; +	/* Should not write any meta pages, if any IO error was occurred */ +	if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) +		goto no_write; + +	f2fs_wait_on_page_writeback(page, META);  	write_meta_page(sbi, page); +no_write:  	dec_page_count(sbi, F2FS_DIRTY_META);  	unlock_page(page);  	return 0; + +redirty_out: +	redirty_page_for_writepage(wbc, page); +	return AOP_WRITEPAGE_ACTIVATE;  }  static int f2fs_write_meta_pages(struct address_space *mapping,  				struct writeback_control *wbc)  {  	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); -	struct block_device *bdev = sbi->sb->s_bdev; -	long written; +	long diff, written; -	if (wbc->for_kupdate) -		return 0; +	trace_f2fs_writepages(mapping->host, wbc, META); -	if (get_pages(sbi, F2FS_DIRTY_META) == 0) -		return 0; +	/* collect a number of dirty meta pages and write together */ +	if (wbc->for_kupdate || +		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) +		goto skip_write;  	/* if mounting is failed, skip writing node pages */  	mutex_lock(&sbi->cp_mutex); -	written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); +	diff = nr_pages_to_write(sbi, META, wbc); +	written = sync_meta_pages(sbi, META, wbc->nr_to_write);  	mutex_unlock(&sbi->cp_mutex); -	wbc->nr_to_write -= written; +	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); +	return 0; + +skip_write: +	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);  	return 0;  }  long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,  						long nr_to_write)  { -	struct address_space *mapping = sbi->meta_inode->i_mapping; +	struct address_space *mapping = META_MAPPING(sbi);  	pgoff_t index = 0, end = LONG_MAX;  	struct pagevec pvec;  	long nwritten = 0; @@ -136,20 +221,33 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,  		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,  				PAGECACHE_TAG_DIRTY,  				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); -		if (nr_pages == 0) +		if (unlikely(nr_pages == 0))  			break;  		for (i = 0; i < nr_pages; i++) {  			struct page *page = pvec.pages[i]; +  			lock_page(page); -			BUG_ON(page->mapping != mapping); -			BUG_ON(!PageDirty(page)); -			clear_page_dirty_for_io(page); + +			if (unlikely(page->mapping != mapping)) { +continue_unlock: +				unlock_page(page); +				continue; +			} +			if (!PageDirty(page)) { +				/* someone wrote it for us */ +				goto continue_unlock; +			} + +			if (!clear_page_dirty_for_io(page)) +				goto continue_unlock; +  			if (f2fs_write_meta_page(page, &wbc)) {  				unlock_page(page);  				break;  			} -			if (nwritten++ >= nr_to_write) +			nwritten++; +			if (unlikely(nwritten >= nr_to_write))  				break;  		}  		pagevec_release(&pvec); @@ -157,7 +255,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,  	}  	if (nwritten) -		f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); +		f2fs_submit_merged_bio(sbi, type, WRITE);  	return nwritten;  } @@ -167,6 +265,8 @@ static int f2fs_set_meta_page_dirty(struct page *page)  	struct address_space *mapping = page->mapping;  	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); +	trace_f2fs_set_page_dirty(page, META); +  	SetPageUptodate(page);  	if (!PageDirty(page)) {  		__set_page_dirty_nobuffers(page); @@ -184,62 +284,50 @@ const struct address_space_operations f2fs_meta_aops = {  int acquire_orphan_inode(struct f2fs_sb_info *sbi)  { -	unsigned int max_orphans;  	int err = 0; -	/* -	 * considering 512 blocks in a segment 5 blocks are needed for cp -	 * and log segment summaries. Remaining blocks are used to keep -	 * orphan entries with the limitation one reserved segment -	 * for cp pack we can have max 1020*507 orphan entries -	 */ -	max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; -	mutex_lock(&sbi->orphan_inode_mutex); -	if (sbi->n_orphans >= max_orphans) +	spin_lock(&sbi->orphan_inode_lock); +	if (unlikely(sbi->n_orphans >= sbi->max_orphans))  		err = -ENOSPC;  	else  		sbi->n_orphans++; -	mutex_unlock(&sbi->orphan_inode_mutex); +	spin_unlock(&sbi->orphan_inode_lock); +  	return err;  }  void release_orphan_inode(struct f2fs_sb_info *sbi)  { -	mutex_lock(&sbi->orphan_inode_mutex); +	spin_lock(&sbi->orphan_inode_lock); +	f2fs_bug_on(sbi->n_orphans == 0);  	sbi->n_orphans--; -	mutex_unlock(&sbi->orphan_inode_mutex); +	spin_unlock(&sbi->orphan_inode_lock);  }  void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)  { -	struct list_head *head, *this; -	struct orphan_inode_entry *new = NULL, *orphan = NULL; +	struct list_head *head; +	struct orphan_inode_entry *new, *orphan; -	mutex_lock(&sbi->orphan_inode_mutex); +	new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); +	new->ino = ino; + +	spin_lock(&sbi->orphan_inode_lock);  	head = &sbi->orphan_inode_list; -	list_for_each(this, head) { -		orphan = list_entry(this, struct orphan_inode_entry, list); -		if (orphan->ino == ino) -			goto out; +	list_for_each_entry(orphan, head, list) { +		if (orphan->ino == ino) { +			spin_unlock(&sbi->orphan_inode_lock); +			kmem_cache_free(orphan_entry_slab, new); +			return; +		} +  		if (orphan->ino > ino)  			break; -		orphan = NULL; -	} -retry: -	new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); -	if (!new) { -		cond_resched(); -		goto retry;  	} -	new->ino = ino; -	/* add new_oentry into list which is sorted by inode number */ -	if (orphan) -		list_add(&new->list, this->prev); -	else -		list_add_tail(&new->list, head); -out: -	mutex_unlock(&sbi->orphan_inode_mutex); +	/* add new orphan entry into list which is sorted by inode number */ +	list_add_tail(&new->list, &orphan->list); +	spin_unlock(&sbi->orphan_inode_lock);  }  void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -247,40 +335,46 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)  	struct list_head *head;  	struct orphan_inode_entry *orphan; -	mutex_lock(&sbi->orphan_inode_mutex); +	spin_lock(&sbi->orphan_inode_lock);  	head = &sbi->orphan_inode_list;  	list_for_each_entry(orphan, head, list) {  		if (orphan->ino == ino) {  			list_del(&orphan->list); -			kmem_cache_free(orphan_entry_slab, orphan); +			f2fs_bug_on(sbi->n_orphans == 0);  			sbi->n_orphans--; -			break; +			spin_unlock(&sbi->orphan_inode_lock); +			kmem_cache_free(orphan_entry_slab, orphan); +			return;  		}  	} -	mutex_unlock(&sbi->orphan_inode_mutex); +	spin_unlock(&sbi->orphan_inode_lock);  }  static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)  {  	struct inode *inode = f2fs_iget(sbi->sb, ino); -	BUG_ON(IS_ERR(inode)); +	f2fs_bug_on(IS_ERR(inode));  	clear_nlink(inode);  	/* truncate all the data during iput */  	iput(inode);  } -int recover_orphan_inodes(struct f2fs_sb_info *sbi) +void recover_orphan_inodes(struct f2fs_sb_info *sbi)  {  	block_t start_blk, orphan_blkaddr, i, j;  	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) -		return 0; +		return; -	sbi->por_doing = 1; -	start_blk = __start_cp_addr(sbi) + 1; +	sbi->por_doing = true; + +	start_blk = __start_cp_addr(sbi) + 1 + +		le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);  	orphan_blkaddr = __start_sum_addr(sbi) - 1; +	ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); +  	for (i = 0; i < orphan_blkaddr; i++) {  		struct page *page = get_meta_page(sbi, start_blk + i);  		struct f2fs_orphan_block *orphan_blk; @@ -294,30 +388,40 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)  	}  	/* clear Orphan Flag */  	clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); -	sbi->por_doing = 0; -	return 0; +	sbi->por_doing = false; +	return;  }  static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)  { -	struct list_head *head, *this, *next; +	struct list_head *head;  	struct f2fs_orphan_block *orphan_blk = NULL; -	struct page *page = NULL;  	unsigned int nentries = 0; -	unsigned short index = 1; -	unsigned short orphan_blocks; - -	orphan_blocks = (unsigned short)((sbi->n_orphans + +	unsigned short index; +	unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +  		(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); +	struct page *page = NULL; +	struct orphan_inode_entry *orphan = NULL; -	mutex_lock(&sbi->orphan_inode_mutex); +	for (index = 0; index < orphan_blocks; index++) +		grab_meta_page(sbi, start_blk + index); + +	index = 1; +	spin_lock(&sbi->orphan_inode_lock);  	head = &sbi->orphan_inode_list;  	/* loop for each orphan inode entry and write them in Jornal block */ -	list_for_each_safe(this, next, head) { -		struct orphan_inode_entry *orphan; +	list_for_each_entry(orphan, head, list) { +		if (!page) { +			page = find_get_page(META_MAPPING(sbi), start_blk++); +			f2fs_bug_on(!page); +			orphan_blk = +				(struct f2fs_orphan_block *)page_address(page); +			memset(orphan_blk, 0, sizeof(*orphan_blk)); +			f2fs_put_page(page, 0); +		} -		orphan = list_entry(this, struct orphan_inode_entry, list); +		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);  		if (nentries == F2FS_ORPHANS_PER_BLOCK) {  			/* @@ -331,29 +435,20 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)  			set_page_dirty(page);  			f2fs_put_page(page, 1);  			index++; -			start_blk++;  			nentries = 0;  			page = NULL;  		} -		if (page) -			goto page_exist; +	} -		page = grab_meta_page(sbi, start_blk); -		orphan_blk = (struct f2fs_orphan_block *)page_address(page); -		memset(orphan_blk, 0, sizeof(*orphan_blk)); -page_exist: -		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); +	if (page) { +		orphan_blk->blk_addr = cpu_to_le16(index); +		orphan_blk->blk_count = cpu_to_le16(orphan_blocks); +		orphan_blk->entry_count = cpu_to_le32(nentries); +		set_page_dirty(page); +		f2fs_put_page(page, 1);  	} -	if (!page) -		goto end; - -	orphan_blk->blk_addr = cpu_to_le16(index); -	orphan_blk->blk_count = cpu_to_le16(orphan_blocks); -	orphan_blk->entry_count = cpu_to_le32(nentries); -	set_page_dirty(page); -	f2fs_put_page(page, 1); -end: -	mutex_unlock(&sbi->orphan_inode_mutex); + +	spin_unlock(&sbi->orphan_inode_lock);  }  static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -416,8 +511,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)  	unsigned long blk_size = sbi->blocksize;  	unsigned long long cp1_version = 0, cp2_version = 0;  	unsigned long long cp_start_blk_no; +	unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); +	block_t cp_blk_no; +	int i; -	sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); +	sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);  	if (!sbi->ckpt)  		return -ENOMEM;  	/* @@ -428,7 +526,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)  	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);  	/* The second checkpoint pack should start at the next segment */ -	cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); +	cp_start_blk_no += ((unsigned long long)1) << +				le32_to_cpu(fsb->log_blocks_per_seg);  	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);  	if (cp1 && cp2) { @@ -447,6 +546,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)  	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);  	memcpy(sbi->ckpt, cp_block, blk_size); +	if (cp_blks <= 1) +		goto done; + +	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); +	if (cur_page == cp2) +		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + +	for (i = 1; i < cp_blks; i++) { +		void *sit_bitmap_ptr; +		unsigned char *ckpt = (unsigned char *)sbi->ckpt; + +		cur_page = get_meta_page(sbi, cp_blk_no + i); +		sit_bitmap_ptr = page_address(cur_page); +		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); +		f2fs_put_page(cur_page, 1); +	} +done:  	f2fs_put_page(cp1, 1);  	f2fs_put_page(cp2, 1);  	return 0; @@ -459,19 +575,14 @@ fail_no_cp:  static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	struct list_head *head = &sbi->dir_inode_list; -	struct list_head *this; - -	list_for_each(this, head) { -		struct dir_inode_entry *entry; -		entry = list_entry(this, struct dir_inode_entry, list); -		if (entry->inode == inode) -			return -EEXIST; -	} -	list_add_tail(&new->list, head); -#ifdef CONFIG_F2FS_STAT_FS -	sbi->n_dirty_dirs++; -#endif + +	if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) +		return -EEXIST; + +	set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); +	F2FS_I(inode)->dirty_dir = new; +	list_add_tail(&new->list, &sbi->dir_inode_list); +	stat_inc_dirty_dir(sbi);  	return 0;  } @@ -479,75 +590,65 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	struct dir_inode_entry *new; +	int ret = 0;  	if (!S_ISDIR(inode->i_mode))  		return; -retry: -	new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); -	if (!new) { -		cond_resched(); -		goto retry; -	} + +	new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);  	new->inode = inode;  	INIT_LIST_HEAD(&new->list);  	spin_lock(&sbi->dir_inode_lock); -	if (__add_dirty_inode(inode, new)) -		kmem_cache_free(inode_entry_slab, new); - -	inc_page_count(sbi, F2FS_DIRTY_DENTS); +	ret = __add_dirty_inode(inode, new);  	inode_inc_dirty_dents(inode);  	SetPagePrivate(page);  	spin_unlock(&sbi->dir_inode_lock); + +	if (ret) +		kmem_cache_free(inode_entry_slab, new);  }  void add_dirty_dir_inode(struct inode *inode)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	struct dir_inode_entry *new; -retry: -	new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); -	if (!new) { -		cond_resched(); -		goto retry; -	} +	struct dir_inode_entry *new = +			f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); +	int ret = 0; +  	new->inode = inode;  	INIT_LIST_HEAD(&new->list);  	spin_lock(&sbi->dir_inode_lock); -	if (__add_dirty_inode(inode, new)) -		kmem_cache_free(inode_entry_slab, new); +	ret = __add_dirty_inode(inode, new);  	spin_unlock(&sbi->dir_inode_lock); + +	if (ret) +		kmem_cache_free(inode_entry_slab, new);  }  void remove_dirty_dir_inode(struct inode *inode)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	struct list_head *head = &sbi->dir_inode_list; -	struct list_head *this; +	struct dir_inode_entry *entry;  	if (!S_ISDIR(inode->i_mode))  		return;  	spin_lock(&sbi->dir_inode_lock); -	if (atomic_read(&F2FS_I(inode)->dirty_dents)) { +	if (get_dirty_dents(inode) || +			!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {  		spin_unlock(&sbi->dir_inode_lock);  		return;  	} -	list_for_each(this, head) { -		struct dir_inode_entry *entry; -		entry = list_entry(this, struct dir_inode_entry, list); -		if (entry->inode == inode) { -			list_del(&entry->list); -			kmem_cache_free(inode_entry_slab, entry); -#ifdef CONFIG_F2FS_STAT_FS -			sbi->n_dirty_dirs--; -#endif -			break; -		} -	} +	entry = F2FS_I(inode)->dirty_dir; +	list_del(&entry->list); +	F2FS_I(inode)->dirty_dir = NULL; +	clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); +	stat_dec_dirty_dir(sbi);  	spin_unlock(&sbi->dir_inode_lock); +	kmem_cache_free(inode_entry_slab, entry);  	/* Only from the recovery routine */  	if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { @@ -556,32 +657,15 @@ void remove_dirty_dir_inode(struct inode *inode)  	}  } -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) -{ -	struct list_head *head = &sbi->dir_inode_list; -	struct list_head *this; -	struct inode *inode = NULL; - -	spin_lock(&sbi->dir_inode_lock); -	list_for_each(this, head) { -		struct dir_inode_entry *entry; -		entry = list_entry(this, struct dir_inode_entry, list); -		if (entry->inode->i_ino == ino) { -			inode = entry->inode; -			break; -		} -	} -	spin_unlock(&sbi->dir_inode_lock); -	return inode; -} -  void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)  { -	struct list_head *head = &sbi->dir_inode_list; +	struct list_head *head;  	struct dir_inode_entry *entry;  	struct inode *inode;  retry:  	spin_lock(&sbi->dir_inode_lock); + +	head = &sbi->dir_inode_list;  	if (list_empty(head)) {  		spin_unlock(&sbi->dir_inode_lock);  		return; @@ -590,14 +674,14 @@ retry:  	inode = igrab(entry->inode);  	spin_unlock(&sbi->dir_inode_lock);  	if (inode) { -		filemap_flush(inode->i_mapping); +		filemap_fdatawrite(inode->i_mapping);  		iput(inode);  	} else {  		/*  		 * We should submit bio, since it exists several  		 * wribacking dentry pages in the freeing inode.  		 */ -		f2fs_submit_bio(sbi, DATA, true); +		f2fs_submit_merged_bio(sbi, DATA, WRITE);  	}  	goto retry;  } @@ -617,11 +701,10 @@ static void block_operations(struct f2fs_sb_info *sbi)  	blk_start_plug(&plug);  retry_flush_dents: -	mutex_lock_all(sbi); - +	f2fs_lock_all(sbi);  	/* write all the dirty dentry pages */  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) { -		mutex_unlock_all(sbi); +		f2fs_unlock_all(sbi);  		sync_dirty_dir_inodes(sbi);  		goto retry_flush_dents;  	} @@ -644,7 +727,22 @@ retry_flush_nodes:  static void unblock_operations(struct f2fs_sb_info *sbi)  {  	mutex_unlock(&sbi->node_write); -	mutex_unlock_all(sbi); +	f2fs_unlock_all(sbi); +} + +static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +{ +	DEFINE_WAIT(wait); + +	for (;;) { +		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); + +		if (!get_pages(sbi, F2FS_WRITEBACK)) +			break; + +		io_schedule(); +	} +	finish_wait(&sbi->cp_wait, &wait);  }  static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) @@ -657,6 +755,13 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	__u32 crc32 = 0;  	void *kaddr;  	int i; +	int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + +	/* +	 * This avoids to conduct wrong roll-forward operations and uses +	 * metapages, so should be called prior to sync_meta_pages below. +	 */ +	discard_next_dnode(sbi);  	/* Flush all the NAT/SIT pages */  	while (get_pages(sbi, F2FS_DIRTY_META)) @@ -701,16 +806,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)  					/ F2FS_ORPHANS_PER_BLOCK; -	ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); +	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + +			orphan_blocks);  	if (is_umount) {  		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);  		ckpt->cp_pack_total_block_count = cpu_to_le32(2 + -			data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); +				cp_payload_blks + data_sum_blocks + +				orphan_blocks + NR_CURSEG_NODE_TYPE);  	} else {  		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);  		ckpt->cp_pack_total_block_count = cpu_to_le32(2 + -			data_sum_blocks + orphan_blocks); +				cp_payload_blks + data_sum_blocks + +				orphan_blocks);  	}  	if (sbi->n_orphans) @@ -736,6 +844,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	set_page_dirty(cp_page);  	f2fs_put_page(cp_page, 1); +	for (i = 1; i < 1 + cp_payload_blks; i++) { +		cp_page = grab_meta_page(sbi, start_blk++); +		kaddr = page_address(cp_page); +		memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, +				(1 << sbi->log_blocksize)); +		set_page_dirty(cp_page); +		f2fs_put_page(cp_page, 1); +	} +  	if (sbi->n_orphans) {  		write_orphan_inodes(sbi, start_blk);  		start_blk += orphan_blocks; @@ -756,11 +873,10 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	f2fs_put_page(cp_page, 1);  	/* wait for previous submitted node/meta pages writeback */ -	while (get_pages(sbi, F2FS_WRITEBACK)) -		congestion_wait(BLK_RW_ASYNC, HZ / 50); +	wait_on_all_pages_writeback(sbi); -	filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); -	filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); +	filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); +	filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);  	/* update user_block_counts */  	sbi->last_valid_block_count = sbi->total_valid_block_count; @@ -769,7 +885,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	/* Here, we only have one bio having CP pack */  	sync_meta_pages(sbi, META_FLUSH, LONG_MAX); -	if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { +	if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {  		clear_prefree_segments(sbi);  		F2FS_RESET_SB_DIRT(sbi);  	} @@ -790,9 +906,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); -	f2fs_submit_bio(sbi, DATA, true); -	f2fs_submit_bio(sbi, NODE, true); -	f2fs_submit_bio(sbi, META, true); +	f2fs_submit_merged_bio(sbi, DATA, WRITE); +	f2fs_submit_merged_bio(sbi, NODE, WRITE); +	f2fs_submit_merged_bio(sbi, META, WRITE);  	/*  	 * update checkpoint pack index @@ -812,25 +928,34 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)  	unblock_operations(sbi);  	mutex_unlock(&sbi->cp_mutex); +	stat_inc_cp_count(sbi->stat_info);  	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");  }  void init_orphan_info(struct f2fs_sb_info *sbi)  { -	mutex_init(&sbi->orphan_inode_mutex); +	spin_lock_init(&sbi->orphan_inode_lock);  	INIT_LIST_HEAD(&sbi->orphan_inode_list);  	sbi->n_orphans = 0; +	/* +	 * considering 512 blocks in a segment 8 blocks are needed for cp +	 * and log segment summaries. Remaining blocks are used to keep +	 * orphan entries with the limitation one reserved segment +	 * for cp pack we can have max 1020*504 orphan entries +	 */ +	sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) +				* F2FS_ORPHANS_PER_BLOCK;  }  int __init create_checkpoint_caches(void)  {  	orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", -			sizeof(struct orphan_inode_entry), NULL); -	if (unlikely(!orphan_entry_slab)) +			sizeof(struct orphan_inode_entry)); +	if (!orphan_entry_slab)  		return -ENOMEM;  	inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", -			sizeof(struct dir_inode_entry), NULL); -	if (unlikely(!inode_entry_slab)) { +			sizeof(struct dir_inode_entry)); +	if (!inode_entry_slab) {  		kmem_cache_destroy(orphan_entry_slab);  		return -ENOMEM;  	} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 941f9b9ca3a..f8cf619edb5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -24,6 +24,190 @@  #include "segment.h"  #include <trace/events/f2fs.h> +static void f2fs_read_end_io(struct bio *bio, int err) +{ +	struct bio_vec *bvec; +	int i; + +	bio_for_each_segment_all(bvec, bio, i) { +		struct page *page = bvec->bv_page; + +		if (!err) { +			SetPageUptodate(page); +		} else { +			ClearPageUptodate(page); +			SetPageError(page); +		} +		unlock_page(page); +	} +	bio_put(bio); +} + +static void f2fs_write_end_io(struct bio *bio, int err) +{ +	struct f2fs_sb_info *sbi = bio->bi_private; +	struct bio_vec *bvec; +	int i; + +	bio_for_each_segment_all(bvec, bio, i) { +		struct page *page = bvec->bv_page; + +		if (unlikely(err)) { +			SetPageError(page); +			set_bit(AS_EIO, &page->mapping->flags); +			f2fs_stop_checkpoint(sbi); +		} +		end_page_writeback(page); +		dec_page_count(sbi, F2FS_WRITEBACK); +	} + +	if (sbi->wait_io) { +		complete(sbi->wait_io); +		sbi->wait_io = NULL; +	} + +	if (!get_pages(sbi, F2FS_WRITEBACK) && +			!list_empty(&sbi->cp_wait.task_list)) +		wake_up(&sbi->cp_wait); + +	bio_put(bio); +} + +/* + * Low-level block read/write IO operations. + */ +static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, +				int npages, bool is_read) +{ +	struct bio *bio; + +	/* No failure on bio allocation */ +	bio = bio_alloc(GFP_NOIO, npages); + +	bio->bi_bdev = sbi->sb->s_bdev; +	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); +	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; +	bio->bi_private = sbi; + +	return bio; +} + +static void __submit_merged_bio(struct f2fs_bio_info *io) +{ +	struct f2fs_io_info *fio = &io->fio; +	int rw; + +	if (!io->bio) +		return; + +	rw = fio->rw; + +	if (is_read_io(rw)) { +		trace_f2fs_submit_read_bio(io->sbi->sb, rw, +						fio->type, io->bio); +		submit_bio(rw, io->bio); +	} else { +		trace_f2fs_submit_write_bio(io->sbi->sb, rw, +						fio->type, io->bio); +		/* +		 * META_FLUSH is only from the checkpoint procedure, and we +		 * should wait this metadata bio for FS consistency. +		 */ +		if (fio->type == META_FLUSH) { +			DECLARE_COMPLETION_ONSTACK(wait); +			io->sbi->wait_io = &wait; +			submit_bio(rw, io->bio); +			wait_for_completion(&wait); +		} else { +			submit_bio(rw, io->bio); +		} +	} + +	io->bio = NULL; +} + +void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, +				enum page_type type, int rw) +{ +	enum page_type btype = PAGE_TYPE_OF_BIO(type); +	struct f2fs_bio_info *io; + +	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; + +	down_write(&io->io_rwsem); + +	/* change META to META_FLUSH in the checkpoint procedure */ +	if (type >= META_FLUSH) { +		io->fio.type = META_FLUSH; +		io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; +	} +	__submit_merged_bio(io); +	up_write(&io->io_rwsem); +} + +/* + * Fill the locked page with data located in the block address. + * Return unlocked page. + */ +int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, +					block_t blk_addr, int rw) +{ +	struct bio *bio; + +	trace_f2fs_submit_page_bio(page, blk_addr, rw); + +	/* Allocate a new bio */ +	bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); + +	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { +		bio_put(bio); +		f2fs_put_page(page, 1); +		return -EFAULT; +	} + +	submit_bio(rw, bio); +	return 0; +} + +void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, +			block_t blk_addr, struct f2fs_io_info *fio) +{ +	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); +	struct f2fs_bio_info *io; +	bool is_read = is_read_io(fio->rw); + +	io = is_read ? &sbi->read_io : &sbi->write_io[btype]; + +	verify_block_addr(sbi, blk_addr); + +	down_write(&io->io_rwsem); + +	if (!is_read) +		inc_page_count(sbi, F2FS_WRITEBACK); + +	if (io->bio && (io->last_block_in_bio != blk_addr - 1 || +						io->fio.rw != fio->rw)) +		__submit_merged_bio(io); +alloc_new: +	if (io->bio == NULL) { +		int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + +		io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); +		io->fio = *fio; +	} + +	if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < +							PAGE_CACHE_SIZE) { +		__submit_merged_bio(io); +		goto alloc_new; +	} + +	io->last_block_in_bio = blk_addr; + +	up_write(&io->io_rwsem); +	trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); +} +  /*   * Lock ordering for the change of data block address:   * ->data_page @@ -37,7 +221,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)  	struct page *node_page = dn->node_page;  	unsigned int ofs_in_node = dn->ofs_in_node; -	f2fs_wait_on_page_writeback(node_page, NODE, false); +	f2fs_wait_on_page_writeback(node_page, NODE);  	rn = F2FS_NODE(node_page); @@ -51,38 +235,57 @@ int reserve_new_block(struct dnode_of_data *dn)  {  	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); -	if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) +	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))  		return -EPERM; -	if (!inc_valid_block_count(sbi, dn->inode, 1)) +	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))  		return -ENOSPC;  	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);  	__set_data_blkaddr(dn, NEW_ADDR);  	dn->data_blkaddr = NEW_ADDR; +	mark_inode_dirty(dn->inode);  	sync_inode_page(dn);  	return 0;  } +int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) +{ +	bool need_put = dn->inode_page ? false : true; +	int err; + +	/* if inode_page exists, index should be zero */ +	f2fs_bug_on(!need_put && index); + +	err = get_dnode_of_data(dn, index, ALLOC_NODE); +	if (err) +		return err; + +	if (dn->data_blkaddr == NULL_ADDR) +		err = reserve_new_block(dn); +	if (err || need_put) +		f2fs_put_dnode(dn); +	return err; +} +  static int check_extent_cache(struct inode *inode, pgoff_t pgofs,  					struct buffer_head *bh_result)  {  	struct f2fs_inode_info *fi = F2FS_I(inode); -#ifdef CONFIG_F2FS_STAT_FS -	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -#endif  	pgoff_t start_fofs, end_fofs;  	block_t start_blkaddr; +	if (is_inode_flag_set(fi, FI_NO_EXTENT)) +		return 0; +  	read_lock(&fi->ext.ext_lock);  	if (fi->ext.len == 0) {  		read_unlock(&fi->ext.ext_lock);  		return 0;  	} -#ifdef CONFIG_F2FS_STAT_FS -	sbi->total_hit_ext++; -#endif +	stat_inc_total_hit(inode->i_sb); +  	start_fofs = fi->ext.fofs;  	end_fofs = fi->ext.fofs + fi->ext.len - 1;  	start_blkaddr = fi->ext.blk_addr; @@ -100,9 +303,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,  		else  			bh_result->b_size = UINT_MAX; -#ifdef CONFIG_F2FS_STAT_FS -		sbi->read_hit_ext++; -#endif +		stat_inc_read_hit(inode->i_sb);  		read_unlock(&fi->ext.ext_lock);  		return 1;  	} @@ -115,14 +316,18 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)  	struct f2fs_inode_info *fi = F2FS_I(dn->inode);  	pgoff_t fofs, start_fofs, end_fofs;  	block_t start_blkaddr, end_blkaddr; +	int need_update = true; -	BUG_ON(blk_addr == NEW_ADDR); +	f2fs_bug_on(blk_addr == NEW_ADDR);  	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +  							dn->ofs_in_node;  	/* Update the page address in the parent node */  	__set_data_blkaddr(dn, blk_addr); +	if (is_inode_flag_set(fi, FI_NO_EXTENT)) +		return; +  	write_lock(&fi->ext.ext_lock);  	start_fofs = fi->ext.fofs; @@ -169,14 +374,21 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)  					fofs - start_fofs + 1;  			fi->ext.len -= fofs - start_fofs + 1;  		} -		goto end_update; +	} else { +		need_update = false;  	} -	write_unlock(&fi->ext.ext_lock); -	return; +	/* Finally, if the extent is very fragmented, let's drop the cache. */ +	if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { +		fi->ext.len = 0; +		set_inode_flag(fi, FI_NO_EXTENT); +		need_update = true; +	}  end_update:  	write_unlock(&fi->ext.ext_lock); -	sync_inode_page(dn); +	if (need_update) +		sync_inode_page(dn); +	return;  }  struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) @@ -202,10 +414,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)  		return ERR_PTR(-ENOENT);  	/* By fallocate(), there is no cached page, but with NEW_ADDR */ -	if (dn.data_blkaddr == NEW_ADDR) +	if (unlikely(dn.data_blkaddr == NEW_ADDR))  		return ERR_PTR(-EINVAL); -	page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); +	page = grab_cache_page(mapping, index);  	if (!page)  		return ERR_PTR(-ENOMEM); @@ -214,11 +426,14 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)  		return page;  	} -	err = f2fs_readpage(sbi, page, dn.data_blkaddr, +	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,  					sync ? READ_SYNC : READA); +	if (err) +		return ERR_PTR(err); +  	if (sync) {  		wait_on_page_locked(page); -		if (!PageUptodate(page)) { +		if (unlikely(!PageUptodate(page))) {  			f2fs_put_page(page, 0);  			return ERR_PTR(-EIO);  		} @@ -240,7 +455,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)  	int err;  repeat: -	page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); +	page = grab_cache_page(mapping, index);  	if (!page)  		return ERR_PTR(-ENOMEM); @@ -252,7 +467,7 @@ repeat:  	}  	f2fs_put_dnode(&dn); -	if (dn.data_blkaddr == NULL_ADDR) { +	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {  		f2fs_put_page(page, 1);  		return ERR_PTR(-ENOENT);  	} @@ -272,16 +487,16 @@ repeat:  		return page;  	} -	err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); +	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);  	if (err)  		return ERR_PTR(err);  	lock_page(page); -	if (!PageUptodate(page)) { +	if (unlikely(!PageUptodate(page))) {  		f2fs_put_page(page, 1);  		return ERR_PTR(-EIO);  	} -	if (page->mapping != mapping) { +	if (unlikely(page->mapping != mapping)) {  		f2fs_put_page(page, 1);  		goto repeat;  	} @@ -292,12 +507,12 @@ repeat:   * Caller ensures that this data page is never allocated.   * A new zero-filled data page is allocated in the page cache.   * - * Also, caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). - * Note that, npage is set only by make_empty_dir. + * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op(). + * Note that, ipage is set only by make_empty_dir.   */  struct page *get_new_data_page(struct inode *inode, -		struct page *npage, pgoff_t index, bool new_i_size) +		struct page *ipage, pgoff_t index, bool new_i_size)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	struct address_space *mapping = inode->i_mapping; @@ -305,24 +520,16 @@ struct page *get_new_data_page(struct inode *inode,  	struct dnode_of_data dn;  	int err; -	set_new_dnode(&dn, inode, npage, npage, 0); -	err = get_dnode_of_data(&dn, index, ALLOC_NODE); +	set_new_dnode(&dn, inode, ipage, NULL, 0); +	err = f2fs_reserve_block(&dn, index);  	if (err)  		return ERR_PTR(err); - -	if (dn.data_blkaddr == NULL_ADDR) { -		if (reserve_new_block(&dn)) { -			if (!npage) -				f2fs_put_dnode(&dn); -			return ERR_PTR(-ENOSPC); -		} -	} -	if (!npage) -		f2fs_put_dnode(&dn);  repeat:  	page = grab_cache_page(mapping, index); -	if (!page) -		return ERR_PTR(-ENOMEM); +	if (!page) { +		err = -ENOMEM; +		goto put_err; +	}  	if (PageUptodate(page))  		return page; @@ -331,15 +538,18 @@ repeat:  		zero_user_segment(page, 0, PAGE_CACHE_SIZE);  		SetPageUptodate(page);  	} else { -		err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); +		err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, +								READ_SYNC);  		if (err) -			return ERR_PTR(err); +			goto put_err; +  		lock_page(page); -		if (!PageUptodate(page)) { +		if (unlikely(!PageUptodate(page))) {  			f2fs_put_page(page, 1); -			return ERR_PTR(-EIO); +			err = -EIO; +			goto put_err;  		} -		if (page->mapping != mapping) { +		if (unlikely(page->mapping != mapping)) {  			f2fs_put_page(page, 1);  			goto repeat;  		} @@ -350,140 +560,206 @@ repeat:  		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));  		/* Only the directory inode sets new_i_size */  		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); -		mark_inode_dirty_sync(inode);  	}  	return page; -} - -static void read_end_io(struct bio *bio, int err) -{ -	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); -	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; -	do { -		struct page *page = bvec->bv_page; - -		if (--bvec >= bio->bi_io_vec) -			prefetchw(&bvec->bv_page->flags); - -		if (uptodate) { -			SetPageUptodate(page); -		} else { -			ClearPageUptodate(page); -			SetPageError(page); -		} -		unlock_page(page); -	} while (bvec >= bio->bi_io_vec); -	bio_put(bio); +put_err: +	f2fs_put_dnode(&dn); +	return ERR_PTR(err);  } -/* - * Fill the locked page with data located in the block address. - * Return unlocked page. - */ -int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, -					block_t blk_addr, int type) +static int __allocate_data_block(struct dnode_of_data *dn)  { -	struct block_device *bdev = sbi->sb->s_bdev; -	struct bio *bio; +	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); +	struct f2fs_summary sum; +	block_t new_blkaddr; +	struct node_info ni; +	int type; -	trace_f2fs_readpage(page, blk_addr, type); +	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) +		return -EPERM; +	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) +		return -ENOSPC; -	down_read(&sbi->bio_sem); +	__set_data_blkaddr(dn, NEW_ADDR); +	dn->data_blkaddr = NEW_ADDR; -	/* Allocate a new bio */ -	bio = f2fs_bio_alloc(bdev, 1); +	get_node_info(sbi, dn->nid, &ni); +	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); -	/* Initialize the bio */ -	bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); -	bio->bi_end_io = read_end_io; +	type = CURSEG_WARM_DATA; -	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { -		bio_put(bio); -		up_read(&sbi->bio_sem); -		f2fs_put_page(page, 1); -		return -EFAULT; -	} +	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); + +	/* direct IO doesn't use extent cache to maximize the performance */ +	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); +	update_extent_cache(new_blkaddr, dn); +	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); -	submit_bio(type, bio); -	up_read(&sbi->bio_sem); +	dn->data_blkaddr = new_blkaddr;  	return 0;  }  /* - * This function should be used by the data read flow only where it - * does not check the "create" flag that indicates block allocation. - * The reason for this special functionality is to exploit VFS readahead - * mechanism. + * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. + * If original data blocks are allocated, then give them to blockdev. + * Otherwise, + *     a. preallocate requested block addresses + *     b. do not use extent cache for better performance + *     c. give the block addresses to blockdev   */ -static int get_data_block_ro(struct inode *inode, sector_t iblock, -			struct buffer_head *bh_result, int create) +static int __get_data_block(struct inode *inode, sector_t iblock, +			struct buffer_head *bh_result, int create, bool fiemap)  { +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	unsigned int blkbits = inode->i_sb->s_blocksize_bits;  	unsigned maxblocks = bh_result->b_size >> blkbits;  	struct dnode_of_data dn; -	pgoff_t pgofs; -	int err; +	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; +	pgoff_t pgofs, end_offset; +	int err = 0, ofs = 1; +	bool allocated = false;  	/* Get the page offset from the block offset(iblock) */  	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); -	if (check_extent_cache(inode, pgofs, bh_result)) { -		trace_f2fs_get_data_block(inode, iblock, bh_result, 0); -		return 0; -	} +	if (check_extent_cache(inode, pgofs, bh_result)) +		goto out; + +	if (create) +		f2fs_lock_op(sbi);  	/* When reading holes, we need its node page */  	set_new_dnode(&dn, inode, NULL, NULL, 0); -	err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); +	err = get_dnode_of_data(&dn, pgofs, mode);  	if (err) { -		trace_f2fs_get_data_block(inode, iblock, bh_result, err); -		return (err == -ENOENT) ? 0 : err; +		if (err == -ENOENT) +			err = 0; +		goto unlock_out;  	} +	if (dn.data_blkaddr == NEW_ADDR && !fiemap) +		goto put_out; -	/* It does not support data allocation */ -	BUG_ON(create); +	if (dn.data_blkaddr != NULL_ADDR) { +		map_bh(bh_result, inode->i_sb, dn.data_blkaddr); +	} else if (create) { +		err = __allocate_data_block(&dn); +		if (err) +			goto put_out; +		allocated = true; +		map_bh(bh_result, inode->i_sb, dn.data_blkaddr); +	} else { +		goto put_out; +	} -	if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { -		int i; -		unsigned int end_offset; +	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); +	bh_result->b_size = (((size_t)1) << blkbits); +	dn.ofs_in_node++; +	pgofs++; -		end_offset = IS_INODE(dn.node_page) ? -				ADDRS_PER_INODE(F2FS_I(inode)) : -				ADDRS_PER_BLOCK; +get_next: +	if (dn.ofs_in_node >= end_offset) { +		if (allocated) +			sync_inode_page(&dn); +		allocated = false; +		f2fs_put_dnode(&dn); -		clear_buffer_new(bh_result); +		set_new_dnode(&dn, inode, NULL, NULL, 0); +		err = get_dnode_of_data(&dn, pgofs, mode); +		if (err) { +			if (err == -ENOENT) +				err = 0; +			goto unlock_out; +		} +		if (dn.data_blkaddr == NEW_ADDR && !fiemap) +			goto put_out; + +		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); +	} +	if (maxblocks > (bh_result->b_size >> blkbits)) { +		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); +		if (blkaddr == NULL_ADDR && create) { +			err = __allocate_data_block(&dn); +			if (err) +				goto sync_out; +			allocated = true; +			blkaddr = dn.data_blkaddr; +		}  		/* Give more consecutive addresses for the read ahead */ -		for (i = 0; i < end_offset - dn.ofs_in_node; i++) -			if (((datablock_addr(dn.node_page, -							dn.ofs_in_node + i)) -				!= (dn.data_blkaddr + i)) || maxblocks == i) -				break; -		map_bh(bh_result, inode->i_sb, dn.data_blkaddr); -		bh_result->b_size = (i << blkbits); +		if (blkaddr == (bh_result->b_blocknr + ofs)) { +			ofs++; +			dn.ofs_in_node++; +			pgofs++; +			bh_result->b_size += (((size_t)1) << blkbits); +			goto get_next; +		}  	} +sync_out: +	if (allocated) +		sync_inode_page(&dn); +put_out:  	f2fs_put_dnode(&dn); -	trace_f2fs_get_data_block(inode, iblock, bh_result, 0); -	return 0; +unlock_out: +	if (create) +		f2fs_unlock_op(sbi); +out: +	trace_f2fs_get_data_block(inode, iblock, bh_result, err); +	return err; +} + +static int get_data_block(struct inode *inode, sector_t iblock, +			struct buffer_head *bh_result, int create) +{ +	return __get_data_block(inode, iblock, bh_result, create, false); +} + +static int get_data_block_fiemap(struct inode *inode, sector_t iblock, +			struct buffer_head *bh_result, int create) +{ +	return __get_data_block(inode, iblock, bh_result, create, true); +} + +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, +		u64 start, u64 len) +{ +	return generic_block_fiemap(inode, fieinfo, +				start, len, get_data_block_fiemap);  }  static int f2fs_read_data_page(struct file *file, struct page *page)  { -	return mpage_readpage(page, get_data_block_ro); +	struct inode *inode = page->mapping->host; +	int ret; + +	trace_f2fs_readpage(page, DATA); + +	/* If the file has inline data, try to read it directlly */ +	if (f2fs_has_inline_data(inode)) +		ret = f2fs_read_inline_data(inode, page); +	else +		ret = mpage_readpage(page, get_data_block); + +	return ret;  }  static int f2fs_read_data_pages(struct file *file,  			struct address_space *mapping,  			struct list_head *pages, unsigned nr_pages)  { -	return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); +	struct inode *inode = file->f_mapping->host; + +	/* If the file has inline data, skip readpages */ +	if (f2fs_has_inline_data(inode)) +		return 0; + +	return mpage_readpages(mapping, pages, nr_pages, get_data_block);  } -int do_write_data_page(struct page *page) +int do_write_data_page(struct page *page, struct f2fs_io_info *fio)  {  	struct inode *inode = page->mapping->host; -	block_t old_blk_addr, new_blk_addr; +	block_t old_blkaddr, new_blkaddr;  	struct dnode_of_data dn;  	int err = 0; @@ -492,10 +768,10 @@ int do_write_data_page(struct page *page)  	if (err)  		return err; -	old_blk_addr = dn.data_blkaddr; +	old_blkaddr = dn.data_blkaddr;  	/* This page is already truncated */ -	if (old_blk_addr == NULL_ADDR) +	if (old_blkaddr == NULL_ADDR)  		goto out_writepage;  	set_page_writeback(page); @@ -504,15 +780,13 @@ int do_write_data_page(struct page *page)  	 * If current allocation needs SSR,  	 * it had better in-place writes for updated data.  	 */ -	if (unlikely(old_blk_addr != NEW_ADDR && +	if (unlikely(old_blkaddr != NEW_ADDR &&  			!is_cold_data(page) &&  			need_inplace_update(inode))) { -		rewrite_data_page(F2FS_SB(inode->i_sb), page, -						old_blk_addr); +		rewrite_data_page(page, old_blkaddr, fio);  	} else { -		write_data_page(inode, page, &dn, -				old_blk_addr, &new_blk_addr); -		update_extent_cache(new_blk_addr, &dn); +		write_data_page(page, &dn, &new_blkaddr, fio); +		update_extent_cache(new_blkaddr, &dn);  	}  out_writepage:  	f2fs_put_dnode(&dn); @@ -527,9 +801,15 @@ static int f2fs_write_data_page(struct page *page,  	loff_t i_size = i_size_read(inode);  	const pgoff_t end_index = ((unsigned long long) i_size)  							>> PAGE_CACHE_SHIFT; -	unsigned offset; +	unsigned offset = 0;  	bool need_balance_fs = false;  	int err = 0; +	struct f2fs_io_info fio = { +		.type = DATA, +		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, +	}; + +	trace_f2fs_writepage(page, DATA);  	if (page->index < end_index)  		goto write; @@ -539,55 +819,50 @@ static int f2fs_write_data_page(struct page *page,  	 * this page does not have to be written to disk.  	 */  	offset = i_size & (PAGE_CACHE_SIZE - 1); -	if ((page->index >= end_index + 1) || !offset) { -		if (S_ISDIR(inode->i_mode)) { -			dec_page_count(sbi, F2FS_DIRTY_DENTS); -			inode_dec_dirty_dents(inode); -		} +	if ((page->index >= end_index + 1) || !offset)  		goto out; -	}  	zero_user_segment(page, offset, PAGE_CACHE_SIZE);  write: -	if (sbi->por_doing) { -		err = AOP_WRITEPAGE_ACTIVATE; +	if (unlikely(sbi->por_doing))  		goto redirty_out; -	}  	/* Dentry blocks are controlled by checkpoint */  	if (S_ISDIR(inode->i_mode)) { -		dec_page_count(sbi, F2FS_DIRTY_DENTS); -		inode_dec_dirty_dents(inode); -		err = do_write_data_page(page); -	} else { -		int ilock = mutex_lock_op(sbi); -		err = do_write_data_page(page); -		mutex_unlock_op(sbi, ilock); -		need_balance_fs = true; +		err = do_write_data_page(page, &fio); +		goto done;  	} -	if (err == -ENOENT) -		goto out; -	else if (err) + +	if (!wbc->for_reclaim) +		need_balance_fs = true; +	else if (has_not_enough_free_secs(sbi, 0))  		goto redirty_out; -	if (wbc->for_reclaim) -		f2fs_submit_bio(sbi, DATA, true); +	f2fs_lock_op(sbi); +	if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) +		err = f2fs_write_inline_data(inode, page, offset); +	else +		err = do_write_data_page(page, &fio); +	f2fs_unlock_op(sbi); +done: +	if (err && err != -ENOENT) +		goto redirty_out;  	clear_cold_data(page);  out: +	inode_dec_dirty_dents(inode);  	unlock_page(page);  	if (need_balance_fs)  		f2fs_balance_fs(sbi); +	if (wbc->for_reclaim) +		f2fs_submit_merged_bio(sbi, DATA, WRITE);  	return 0;  redirty_out: -	wbc->pages_skipped++; -	set_page_dirty(page); -	return err; +	redirty_page_for_writepage(wbc, page); +	return AOP_WRITEPAGE_ACTIVATE;  } -#define MAX_DESIRED_PAGES_WP	4096 -  static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,  			void *data)  { @@ -604,17 +879,20 @@ static int f2fs_write_data_pages(struct address_space *mapping,  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	bool locked = false;  	int ret; -	long excess_nrtw = 0, desired_nrtw; +	long diff; + +	trace_f2fs_writepages(mapping->host, wbc, DATA);  	/* deal with chardevs and other special file */  	if (!mapping->a_ops->writepage)  		return 0; -	if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { -		desired_nrtw = MAX_DESIRED_PAGES_WP; -		excess_nrtw = desired_nrtw - wbc->nr_to_write; -		wbc->nr_to_write = desired_nrtw; -	} +	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && +			get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && +			available_free_memory(sbi, DIRTY_DENTS)) +		goto skip_write; + +	diff = nr_pages_to_write(sbi, DATA, wbc);  	if (!S_ISDIR(inode->i_mode)) {  		mutex_lock(&sbi->writepages); @@ -623,12 +901,17 @@ static int f2fs_write_data_pages(struct address_space *mapping,  	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);  	if (locked)  		mutex_unlock(&sbi->writepages); -	f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); + +	f2fs_submit_merged_bio(sbi, DATA, WRITE);  	remove_dirty_dir_inode(inode); -	wbc->nr_to_write -= excess_nrtw; +	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);  	return ret; + +skip_write: +	wbc->pages_skipped += get_dirty_dents(inode); +	return 0;  }  static int f2fs_write_begin(struct file *file, struct address_space *mapping, @@ -641,30 +924,44 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,  	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;  	struct dnode_of_data dn;  	int err = 0; -	int ilock; + +	trace_f2fs_write_begin(inode, pos, len, flags);  	f2fs_balance_fs(sbi);  repeat: +	err = f2fs_convert_inline_data(inode, pos + len); +	if (err) +		return err; +  	page = grab_cache_page_write_begin(mapping, index, flags);  	if (!page)  		return -ENOMEM; + +	/* to avoid latency during memory pressure */ +	unlock_page(page); +  	*pagep = page; -	ilock = mutex_lock_op(sbi); +	if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) +		goto inline_data; +	f2fs_lock_op(sbi);  	set_new_dnode(&dn, inode, NULL, NULL, 0); -	err = get_dnode_of_data(&dn, index, ALLOC_NODE); -	if (err) -		goto err; +	err = f2fs_reserve_block(&dn, index); +	f2fs_unlock_op(sbi); -	if (dn.data_blkaddr == NULL_ADDR) -		err = reserve_new_block(&dn); - -	f2fs_put_dnode(&dn); -	if (err) -		goto err; +	if (err) { +		f2fs_put_page(page, 0); +		return err; +	} +inline_data: +	lock_page(page); +	if (unlikely(page->mapping != mapping)) { +		f2fs_put_page(page, 1); +		goto repeat; +	} -	mutex_unlock_op(sbi, ilock); +	f2fs_wait_on_page_writeback(page, DATA);  	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))  		return 0; @@ -681,15 +978,25 @@ repeat:  	if (dn.data_blkaddr == NEW_ADDR) {  		zero_user_segment(page, 0, PAGE_CACHE_SIZE);  	} else { -		err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); -		if (err) -			return err; +		if (f2fs_has_inline_data(inode)) { +			err = f2fs_read_inline_data(inode, page); +			if (err) { +				page_cache_release(page); +				return err; +			} +		} else { +			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, +							READ_SYNC); +			if (err) +				return err; +		} +  		lock_page(page); -		if (!PageUptodate(page)) { +		if (unlikely(!PageUptodate(page))) {  			f2fs_put_page(page, 1);  			return -EIO;  		} -		if (page->mapping != mapping) { +		if (unlikely(page->mapping != mapping)) {  			f2fs_put_page(page, 1);  			goto repeat;  		} @@ -698,11 +1005,6 @@ out:  	SetPageUptodate(page);  	clear_cold_data(page);  	return 0; - -err: -	mutex_unlock_op(sbi, ilock); -	f2fs_put_page(page, 1); -	return err;  }  static int f2fs_write_end(struct file *file, @@ -712,6 +1014,8 @@ static int f2fs_write_end(struct file *file,  {  	struct inode *inode = page->mapping->host; +	trace_f2fs_write_end(inode, pos, len, copied); +  	SetPageUptodate(page);  	set_page_dirty(page); @@ -721,34 +1025,53 @@ static int f2fs_write_end(struct file *file,  		update_inode_page(inode);  	} -	unlock_page(page); -	page_cache_release(page); +	f2fs_put_page(page, 1);  	return copied;  } +static int check_direct_IO(struct inode *inode, int rw, +		struct iov_iter *iter, loff_t offset) +{ +	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; + +	if (rw == READ) +		return 0; + +	if (offset & blocksize_mask) +		return -EINVAL; + +	if (iov_iter_alignment(iter) & blocksize_mask) +		return -EINVAL; + +	return 0; +} +  static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, -		const struct iovec *iov, loff_t offset, unsigned long nr_segs) +		struct iov_iter *iter, loff_t offset)  {  	struct file *file = iocb->ki_filp;  	struct inode *inode = file->f_mapping->host; -	if (rw == WRITE) +	/* Let buffer I/O handle the inline data case. */ +	if (f2fs_has_inline_data(inode)) +		return 0; + +	if (check_direct_IO(inode, rw, iter, offset))  		return 0; -	/* Needs synchronization with the cleaner */ -	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, -						  get_data_block_ro); +	/* clear fsync mark to recover these blocks */ +	fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); + +	return blockdev_direct_IO(rw, iocb, inode, iter, offset, +				  get_data_block);  }  static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,  				      unsigned int length)  {  	struct inode *inode = page->mapping->host; -	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	if (S_ISDIR(inode->i_mode) && PageDirty(page)) { -		dec_page_count(sbi, F2FS_DIRTY_DENTS); +	if (PageDirty(page))  		inode_dec_dirty_dents(inode); -	}  	ClearPagePrivate(page);  } @@ -763,7 +1086,11 @@ static int f2fs_set_data_page_dirty(struct page *page)  	struct address_space *mapping = page->mapping;  	struct inode *inode = mapping->host; +	trace_f2fs_set_page_dirty(page, DATA); +  	SetPageUptodate(page); +	mark_inode_dirty(inode); +  	if (!PageDirty(page)) {  		__set_page_dirty_nobuffers(page);  		set_dirty_dir_page(inode, page); @@ -774,7 +1101,12 @@ static int f2fs_set_data_page_dirty(struct page *page)  static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)  { -	return generic_block_bmap(mapping, block, get_data_block_ro); +	struct inode *inode = mapping->host; + +	if (f2fs_has_inline_data(inode)) +		return 0; + +	return generic_block_bmap(mapping, block, get_data_block);  }  const struct address_space_operations f2fs_dblock_aops = { diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a84b0a8e685..b52c12cf587 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -24,7 +24,7 @@  #include "gc.h"  static LIST_HEAD(f2fs_stat_list); -static struct dentry *debugfs_root; +static struct dentry *f2fs_debugfs_root;  static DEFINE_MUTEX(f2fs_stat_mutex);  static void update_general_status(struct f2fs_sb_info *sbi) @@ -45,14 +45,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)  	si->valid_count = valid_user_blocks(sbi);  	si->valid_node_count = valid_node_count(sbi);  	si->valid_inode_count = valid_inode_count(sbi); +	si->inline_inode = sbi->inline_inode;  	si->utilization = utilization(sbi);  	si->free_segs = free_segments(sbi);  	si->free_secs = free_sections(sbi);  	si->prefree_count = prefree_segments(sbi);  	si->dirty_count = dirty_segments(sbi); -	si->node_pages = sbi->node_inode->i_mapping->nrpages; -	si->meta_pages = sbi->meta_inode->i_mapping->nrpages; +	si->node_pages = NODE_MAPPING(sbi)->nrpages; +	si->meta_pages = META_MAPPING(sbi)->nrpages;  	si->nats = NM_I(sbi)->nat_cnt;  	si->sits = SIT_I(sbi)->dirty_sentries;  	si->fnids = NM_I(sbi)->fcnt; @@ -85,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)  {  	struct f2fs_stat_info *si = F2FS_STAT(sbi);  	unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; -	struct sit_info *sit_i = SIT_I(sbi);  	unsigned int segno, vblocks;  	int ndirty = 0; @@ -93,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)  	total_vblocks = 0;  	blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);  	hblks_per_sec = blks_per_sec / 2; -	mutex_lock(&sit_i->sentry_lock);  	for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {  		vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);  		dist = abs(vblocks - hblks_per_sec); @@ -104,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)  			ndirty++;  		}  	} -	mutex_unlock(&sit_i->sentry_lock);  	dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;  	si->bimodal = bimodal / dist;  	if (si->dirty_count) @@ -165,9 +163,9 @@ get_cache:  	/* free nids */  	si->cache_mem = NM_I(sbi)->fcnt;  	si->cache_mem += NM_I(sbi)->nat_cnt; -	npages = sbi->node_inode->i_mapping->nrpages; +	npages = NODE_MAPPING(sbi)->nrpages;  	si->cache_mem += npages << PAGE_CACHE_SHIFT; -	npages = sbi->meta_inode->i_mapping->nrpages; +	npages = META_MAPPING(sbi)->nrpages;  	si->cache_mem += npages << PAGE_CACHE_SHIFT;  	si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);  	si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); @@ -200,6 +198,8 @@ static int stat_show(struct seq_file *s, void *v)  		seq_printf(s, "Other: %u)\n  - Data: %u\n",  			   si->valid_node_count - si->valid_inode_count,  			   si->valid_count - si->valid_node_count); +		seq_printf(s, "  - Inline_data Inode: %u\n", +			   si->inline_inode);  		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",  			   si->main_area_segs, si->main_area_sections,  			   si->main_area_zones); @@ -233,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)  			   si->dirty_count);  		seq_printf(s, "  - Prefree: %d\n  - Free: %d (%d)\n\n",  			   si->prefree_count, si->free_segs, si->free_secs); +		seq_printf(s, "CP calls: %d\n", si->cp_count);  		seq_printf(s, "GC calls: %d (BG: %d)\n",  			   si->call_count, si->bg_gc);  		seq_printf(s, "  - data segments : %d\n", si->data_segs); @@ -242,17 +243,17 @@ static int stat_show(struct seq_file *s, void *v)  		seq_printf(s, "  - node blocks : %d\n", si->node_blks);  		seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",  			   si->hit_ext, si->total_ext); -		seq_printf(s, "\nBalancing F2FS Async:\n"); -		seq_printf(s, "  - nodes %4d in %4d\n", +		seq_puts(s, "\nBalancing F2FS Async:\n"); +		seq_printf(s, "  - nodes: %4d in %4d\n",  			   si->ndirty_node, si->node_pages); -		seq_printf(s, "  - dents %4d in dirs:%4d\n", +		seq_printf(s, "  - dents: %4d in dirs:%4d\n",  			   si->ndirty_dent, si->ndirty_dirs); -		seq_printf(s, "  - meta %4d in %4d\n", +		seq_printf(s, "  - meta: %4d in %4d\n",  			   si->ndirty_meta, si->meta_pages); -		seq_printf(s, "  - NATs %5d > %lu\n", -			   si->nats, NM_WOUT_THRESHOLD); -		seq_printf(s, "  - SITs: %5d\n  - free_nids: %5d\n", -			   si->sits, si->fnids); +		seq_printf(s, "  - NATs: %9d\n  - SITs: %9d\n", +			   si->nats, si->sits); +		seq_printf(s, "  - free_nids: %9d\n", +			   si->fnids);  		seq_puts(s, "\nDistribution of User Blocks:");  		seq_puts(s, " [ valid | invalid | free ]\n");  		seq_puts(s, "  ["); @@ -340,14 +341,32 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)  void __init f2fs_create_root_stats(void)  { -	debugfs_root = debugfs_create_dir("f2fs", NULL); -	if (debugfs_root) -		debugfs_create_file("status", S_IRUGO, debugfs_root, -					 NULL, &stat_fops); +	struct dentry *file; + +	f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); +	if (!f2fs_debugfs_root) +		goto bail; + +	file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, +			NULL, &stat_fops); +	if (!file) +		goto free_debugfs_dir; + +	return; + +free_debugfs_dir: +	debugfs_remove(f2fs_debugfs_root); + +bail: +	f2fs_debugfs_root = NULL; +	return;  }  void f2fs_destroy_root_stats(void)  { -	debugfs_remove_recursive(debugfs_root); -	debugfs_root = NULL; +	if (!f2fs_debugfs_root) +		return; + +	debugfs_remove_recursive(f2fs_debugfs_root); +	f2fs_debugfs_root = NULL;  } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 384c6daf9a8..a4addd72ebb 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)  							>> PAGE_CACHE_SHIFT;  } -static unsigned int dir_buckets(unsigned int level) +static unsigned int dir_buckets(unsigned int level, int dir_level)  { -	if (level < MAX_DIR_HASH_DEPTH / 2) -		return 1 << level; +	if (level + dir_level < MAX_DIR_HASH_DEPTH / 2) +		return 1 << (level + dir_level);  	else -		return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); +		return MAX_DIR_BUCKETS;  }  static unsigned int bucket_blocks(unsigned int level) @@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)  	de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];  } -static unsigned long dir_block_index(unsigned int level, unsigned int idx) +static unsigned long dir_block_index(unsigned int level, +				int dir_level, unsigned int idx)  {  	unsigned long i;  	unsigned long bidx = 0;  	for (i = 0; i < level; i++) -		bidx += dir_buckets(i) * bucket_blocks(i); +		bidx += dir_buckets(i, dir_level) * bucket_blocks(i);  	bidx += idx * bucket_blocks(level);  	return bidx;  } @@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,  			f2fs_hash_t namehash, struct page **res_page)  {  	struct f2fs_dir_entry *de; -	unsigned long bit_pos, end_pos, next_pos; +	unsigned long bit_pos = 0;  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); -	int slots; +	const void *dentry_bits = &dentry_blk->dentry_bitmap; +	int max_len = 0; -	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, -					NR_DENTRY_IN_BLOCK, 0);  	while (bit_pos < NR_DENTRY_IN_BLOCK) { +		if (!test_bit_le(bit_pos, dentry_bits)) { +			if (bit_pos == 0) +				max_len = 1; +			else if (!test_bit_le(bit_pos - 1, dentry_bits)) +				max_len++; +			bit_pos++; +			continue; +		}  		de = &dentry_blk->dentry[bit_pos]; -		slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); -  		if (early_match_name(name, namelen, namehash, de)) {  			if (!memcmp(dentry_blk->filename[bit_pos],  							name, namelen)) { @@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,  				goto found;  			}  		} -		next_pos = bit_pos + slots; -		bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, -				NR_DENTRY_IN_BLOCK, next_pos); -		if (bit_pos >= NR_DENTRY_IN_BLOCK) -			end_pos = NR_DENTRY_IN_BLOCK; -		else -			end_pos = bit_pos; -		if (*max_slots < end_pos - next_pos) -			*max_slots = end_pos - next_pos; +		if (max_len > *max_slots) { +			*max_slots = max_len; +			max_len = 0; +		} +		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));  	}  	de = NULL;  	kunmap(dentry_page);  found: +	if (max_len > *max_slots) +		*max_slots = max_len;  	return de;  } @@ -139,12 +143,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,  	bool room = false;  	int max_slots = 0; -	BUG_ON(level > MAX_DIR_HASH_DEPTH); +	f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); -	nbucket = dir_buckets(level); +	nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);  	nblock = bucket_blocks(level); -	bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); +	bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, +					le32_to_cpu(namehash) % nbucket);  	end_block = bidx + nblock;  	for (; bidx < end_block; bidx++) { @@ -190,9 +195,6 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,  	unsigned int max_depth;  	unsigned int level; -	if (namelen > F2FS_NAME_LEN) -		return NULL; -  	if (npages == 0)  		return NULL; @@ -251,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,  		struct page *page, struct inode *inode)  {  	lock_page(page); -	wait_on_page_writeback(page); +	f2fs_wait_on_page_writeback(page, DATA);  	de->ino = cpu_to_le32(inode->i_ino);  	set_de_type(de, inode);  	kunmap(page); @@ -259,20 +261,19 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,  	dir->i_mtime = dir->i_ctime = CURRENT_TIME;  	mark_inode_dirty(dir); -	/* update parent inode number before releasing dentry page */ -	F2FS_I(inode)->i_pino = dir->i_ino; -  	f2fs_put_page(page, 1);  }  static void init_dent_inode(const struct qstr *name, struct page *ipage)  { -	struct f2fs_node *rn; +	struct f2fs_inode *ri; + +	f2fs_wait_on_page_writeback(ipage, NODE);  	/* copy name info. to this inode page */ -	rn = F2FS_NODE(ipage); -	rn->i.i_namelen = cpu_to_le32(name->len); -	memcpy(rn->i.i_name, name->name, name->len); +	ri = F2FS_INODE(ipage); +	ri->i_namelen = cpu_to_le32(name->len); +	memcpy(ri->i_name, name->name, name->len);  	set_page_dirty(ipage);  } @@ -346,21 +347,18 @@ static struct page *init_inode_metadata(struct inode *inode,  				goto error;  		} -		err = f2fs_init_acl(inode, dir); +		err = f2fs_init_acl(inode, dir, page);  		if (err) -			goto error; +			goto put_error;  		err = f2fs_init_security(inode, dir, name, page);  		if (err) -			goto error; - -		wait_on_page_writeback(page); +			goto put_error;  	} else {  		page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);  		if (IS_ERR(page))  			return page; -		wait_on_page_writeback(page);  		set_cold_node(inode, page);  	} @@ -376,8 +374,13 @@ static struct page *init_inode_metadata(struct inode *inode,  	}  	return page; -error: +put_error:  	f2fs_put_page(page, 1); +error: +	/* once the failed inode becomes a bad inode, i_mode is S_IFREG */ +	truncate_inode_pages(&inode->i_data, 0); +	truncate_blocks(inode, 0); +	remove_dirty_dir_inode(inode);  	remove_inode_page(inode);  	return ERR_PTR(err);  } @@ -393,16 +396,13 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,  		clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);  	}  	dir->i_mtime = dir->i_ctime = CURRENT_TIME; +	mark_inode_dirty(dir); +  	if (F2FS_I(dir)->i_current_depth != current_depth) {  		F2FS_I(dir)->i_current_depth = current_depth;  		set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);  	} -	if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) -		update_inode_page(dir); -	else -		mark_inode_dirty(dir); -  	if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))  		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);  } @@ -432,10 +432,11 @@ next:  }  /* - * Caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). + * Caller should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op().   */ -int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) +int __f2fs_add_link(struct inode *dir, const struct qstr *name, +						struct inode *inode)  {  	unsigned int bit_pos;  	unsigned int level; @@ -461,17 +462,18 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in  	}  start: -	if (current_depth == MAX_DIR_HASH_DEPTH) +	if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))  		return -ENOSPC;  	/* Increase the depth, if required */  	if (level == current_depth)  		++current_depth; -	nbucket = dir_buckets(level); +	nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);  	nblock = bucket_blocks(level); -	bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); +	bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, +				(le32_to_cpu(dentry_hash) % nbucket));  	for (block = bidx; block <= (bidx + nblock - 1); block++) {  		dentry_page = get_new_data_page(dir, NULL, block, true); @@ -491,8 +493,9 @@ start:  	++level;  	goto start;  add_dentry: -	wait_on_page_writeback(dentry_page); +	f2fs_wait_on_page_writeback(dentry_page, DATA); +	down_write(&F2FS_I(inode)->i_sem);  	page = init_inode_metadata(inode, dir, name);  	if (IS_ERR(page)) {  		err = PTR_ERR(page); @@ -515,7 +518,12 @@ add_dentry:  	update_parent_metadata(dir, inode, current_depth);  fail: -	clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); +	up_write(&F2FS_I(inode)->i_sem); + +	if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { +		update_inode_page(dir); +		clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); +	}  	kunmap(dentry_page);  	f2fs_put_page(dentry_page, 1);  	return err; @@ -532,13 +540,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,  	unsigned int bit_pos;  	struct address_space *mapping = page->mapping;  	struct inode *dir = mapping->host; -	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));  	void *kaddr = page_address(page);  	int i;  	lock_page(page); -	wait_on_page_writeback(page); +	f2fs_wait_on_page_writeback(page, DATA);  	dentry_blk = (struct f2fs_dentry_block *)kaddr;  	bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; @@ -554,20 +561,22 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,  	dir->i_ctime = dir->i_mtime = CURRENT_TIME; -	if (inode && S_ISDIR(inode->i_mode)) { -		drop_nlink(dir); -		update_inode_page(dir); -	} else { -		mark_inode_dirty(dir); -	} -  	if (inode) { +		struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + +		down_write(&F2FS_I(inode)->i_sem); + +		if (S_ISDIR(inode->i_mode)) { +			drop_nlink(dir); +			update_inode_page(dir); +		}  		inode->i_ctime = CURRENT_TIME;  		drop_nlink(inode);  		if (S_ISDIR(inode->i_mode)) {  			drop_nlink(inode);  			i_size_write(inode, 0);  		} +		up_write(&F2FS_I(inode)->i_sem);  		update_inode_page(inode);  		if (inode->i_nlink == 0) @@ -580,7 +589,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,  		truncate_hole(dir, page->index, page->index + 1);  		clear_page_dirty_for_io(page);  		ClearPageUptodate(page); -		dec_page_count(sbi, F2FS_DIRTY_DENTS);  		inode_dec_dirty_dents(dir);  	}  	f2fs_put_page(page, 1); @@ -631,12 +639,18 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)  	struct f2fs_dentry_block *dentry_blk = NULL;  	struct f2fs_dir_entry *de = NULL;  	struct page *dentry_page = NULL; +	struct file_ra_state *ra = &file->f_ra;  	unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);  	unsigned char d_type = DT_UNKNOWN;  	bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); -	for ( ; n < npages; n++) { +	/* readahead for multi pages of dir */ +	if (npages - n > 1 && !ra_has_index(ra, n)) +		page_cache_sync_readahead(inode->i_mapping, ra, file, n, +				min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); + +	for (; n < npages; n++) {  		dentry_page = get_lock_data_page(inode, n);  		if (IS_ERR(dentry_page))  			continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 608f0df5b91..58df97e174d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -18,6 +18,15 @@  #include <linux/crc32.h>  #include <linux/magic.h>  #include <linux/kobject.h> +#include <linux/sched.h> + +#ifdef CONFIG_F2FS_CHECK_FS +#define f2fs_bug_on(condition)	BUG_ON(condition) +#define f2fs_down_write(x, y)	down_write_nest_lock(x, y) +#else +#define f2fs_bug_on(condition) +#define f2fs_down_write(x, y)	down_write(x) +#endif  /*   * For mount options @@ -30,6 +39,8 @@  #define F2FS_MOUNT_POSIX_ACL		0x00000020  #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY	0x00000040  #define F2FS_MOUNT_INLINE_XATTR		0x00000080 +#define F2FS_MOUNT_INLINE_DATA		0x00000100 +#define F2FS_MOUNT_FLUSH_MERGE		0x00000200  #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)  #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -78,6 +89,16 @@ enum {  	SIT_BITMAP  }; +/* + * For CP/NAT/SIT/SSA readahead + */ +enum { +	META_CP, +	META_NAT, +	META_SIT, +	META_SSA +}; +  /* for the list of orphan inodes */  struct orphan_inode_entry {  	struct list_head list;	/* list head */ @@ -90,6 +111,13 @@ struct dir_inode_entry {  	struct inode *inode;	/* vfs inode pointer */  }; +/* for the list of blockaddresses to be discarded */ +struct discard_entry { +	struct list_head list;	/* list head */ +	block_t blkaddr;	/* block address to be discarded */ +	int len;		/* # of consecutive blocks of the discard */ +}; +  /* for the list of fsync inodes, used only during recovery */  struct fsync_inode_entry {  	struct list_head list;	/* list head */ @@ -148,13 +176,17 @@ enum {  	LOOKUP_NODE,			/* look up a node without readahead */  	LOOKUP_NODE_RA,			/*  					 * look up a node with readahead called -					 * by get_datablock_ro. +					 * by get_data_block.  					 */  };  #define F2FS_LINK_MAX		32000	/* maximum link count per file */ +#define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */ +  /* for in-memory extent cache entry */ +#define F2FS_MIN_EXTENT_LEN	16	/* minimum extent length */ +  struct extent_info {  	rwlock_t ext_lock;	/* rwlock for consistency */  	unsigned int fofs;	/* start offset in a file */ @@ -168,22 +200,27 @@ struct extent_info {  #define FADVISE_COLD_BIT	0x01  #define FADVISE_LOST_PINO_BIT	0x02 +#define DEF_DIR_LEVEL		0 +  struct f2fs_inode_info {  	struct inode vfs_inode;		/* serve a vfs inode */  	unsigned long i_flags;		/* keep an inode flags for ioctl */  	unsigned char i_advise;		/* use to give file attribute hints */ +	unsigned char i_dir_level;	/* use for dentry level for large dir */  	unsigned int i_current_depth;	/* use only in directory structure */  	unsigned int i_pino;		/* parent inode number */  	umode_t i_acl_mode;		/* keep file acl mode temporarily */  	/* Use below internally in f2fs*/  	unsigned long flags;		/* use to pass per-file flags */ +	struct rw_semaphore i_sem;	/* protect fi info */  	atomic_t dirty_dents;		/* # of dirty dentry pages */  	f2fs_hash_t chash;		/* hash value of given file name */  	unsigned int clevel;		/* maximum level of given file name */  	nid_t i_xattr_nid;		/* node id that contains xattrs */  	unsigned long long xattr_ver;	/* cp version of xattr modification */  	struct extent_info ext;		/* in-memory extent cache entry */ +	struct dir_inode_entry *dirty_dir;	/* the pointer of dirty dir */  };  static inline void get_extent_info(struct extent_info *ext, @@ -209,7 +246,9 @@ static inline void set_raw_extent(struct extent_info *ext,  struct f2fs_nm_info {  	block_t nat_blkaddr;		/* base disk address of NAT */  	nid_t max_nid;			/* maximum possible node ids */ +	nid_t available_nids;		/* maximum available node ids */  	nid_t next_scan_nid;		/* the next nid to be scanned */ +	unsigned int ram_thresh;	/* control the memory footprint */  	/* NAT cache management */  	struct radix_tree_root nat_root;/* root of the nat entry cache */ @@ -219,6 +258,7 @@ struct f2fs_nm_info {  	struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */  	/* free node ids management */ +	struct radix_tree_root free_nid_root;/* root of the free_nid cache */  	struct list_head free_nid_list;	/* a list for free nids */  	spinlock_t free_nid_list_lock;	/* protect free nid list */  	unsigned int fcnt;		/* the number of free node id */ @@ -281,15 +321,27 @@ enum {  	NO_CHECK_TYPE  }; +struct flush_cmd { +	struct flush_cmd *next; +	struct completion wait; +	int ret; +}; + +struct flush_cmd_control { +	struct task_struct *f2fs_issue_flush;	/* flush thread */ +	wait_queue_head_t flush_wait_queue;	/* waiting queue for wake-up */ +	struct flush_cmd *issue_list;		/* list for command issue */ +	struct flush_cmd *dispatch_list;	/* list for command dispatch */ +	spinlock_t issue_lock;			/* for issue list lock */ +	struct flush_cmd *issue_tail;		/* list tail of issue list */ +}; +  struct f2fs_sm_info {  	struct sit_info *sit_info;		/* whole segment information */  	struct free_segmap_info *free_info;	/* free segment information */  	struct dirty_seglist_info *dirty_info;	/* dirty segment information */  	struct curseg_info *curseg_array;	/* active segment information */ -	struct list_head wblist_head;	/* list of under-writeback pages */ -	spinlock_t wblist_lock;		/* lock for checkpoint */ -  	block_t seg0_blkaddr;		/* block address of 0'th segment */  	block_t main_blkaddr;		/* start block address of main area */  	block_t ssa_blkaddr;		/* start block address of SSA area */ @@ -298,6 +350,21 @@ struct f2fs_sm_info {  	unsigned int main_segments;	/* # of segments in main area */  	unsigned int reserved_segments;	/* # of reserved segments */  	unsigned int ovp_segments;	/* # of overprovision segments */ + +	/* a threshold to reclaim prefree segments */ +	unsigned int rec_prefree_segments; + +	/* for small discard management */ +	struct list_head discard_list;		/* 4KB discard list */ +	int nr_discards;			/* # of discards in the list */ +	int max_discards;			/* max. discards to be issued */ + +	unsigned int ipu_policy;	/* in-place-update policy */ +	unsigned int min_ipu_util;	/* in-place-update threshold */ + +	/* for flush command control */ +	struct flush_cmd_control *cmd_control_info; +  };  /* @@ -318,14 +385,6 @@ enum count_type {  };  /* - * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. - * The checkpoint procedure blocks all the locks in this fs_lock array. - * Some FS operations grab free locks, and if there is no free lock, - * then wait to grab a lock in a round-robin manner. - */ -#define NR_GLOBAL_LOCKS	8 - -/*   * The below are the page types of bios used in submti_bio().   * The available types are:   * DATA			User data pages. It operates as async mode. @@ -336,6 +395,7 @@ enum count_type {   *			with waiting the bio's completion   * ...			Only can be used with META.   */ +#define PAGE_TYPE_OF_BIO(type)	((type) > META ? META : (type))  enum page_type {  	DATA,  	NODE, @@ -344,6 +404,20 @@ enum page_type {  	META_FLUSH,  }; +struct f2fs_io_info { +	enum page_type type;	/* contains DATA/NODE/META/META_FLUSH */ +	int rw;			/* contains R/RS/W/WS with REQ_META/REQ_PRIO */ +}; + +#define is_read_io(rw)	(((rw) & 1) == READ) +struct f2fs_bio_info { +	struct f2fs_sb_info *sbi;	/* f2fs superblock */ +	struct bio *bio;		/* bios to merge */ +	sector_t last_block_in_bio;	/* last block number */ +	struct f2fs_io_info fio;	/* store buffered io info. */ +	struct rw_semaphore io_rwsem;	/* blocking op for bio */ +}; +  struct f2fs_sb_info {  	struct super_block *sb;			/* pointer to VFS super block */  	struct proc_dir_entry *s_proc;		/* proc entry */ @@ -357,25 +431,27 @@ struct f2fs_sb_info {  	/* for segment-related operations */  	struct f2fs_sm_info *sm_info;		/* segment manager */ -	struct bio *bio[NR_PAGE_TYPE];		/* bios to merge */ -	sector_t last_block_in_bio[NR_PAGE_TYPE];	/* last block number */ -	struct rw_semaphore bio_sem;		/* IO semaphore */ + +	/* for bio operations */ +	struct f2fs_bio_info read_io;			/* for read bios */ +	struct f2fs_bio_info write_io[NR_PAGE_TYPE];	/* for write bios */ +	struct completion *wait_io;		/* for completion bios */  	/* for checkpoint */  	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */  	struct inode *meta_inode;		/* cache meta blocks */  	struct mutex cp_mutex;			/* checkpoint procedure lock */ -	struct mutex fs_lock[NR_GLOBAL_LOCKS];	/* blocking FS operations */ +	struct rw_semaphore cp_rwsem;		/* blocking FS operations */  	struct mutex node_write;		/* locking node writes */  	struct mutex writepages;		/* mutex for writepages() */ -	unsigned char next_lock_num;		/* round-robin global locks */ -	int por_doing;				/* recovery is doing or not */ -	int on_build_free_nids;			/* build_free_nids is doing */ +	bool por_doing;				/* recovery is doing or not */ +	wait_queue_head_t cp_wait;  	/* for orphan inode management */  	struct list_head orphan_inode_list;	/* orphan inode list */ -	struct mutex orphan_inode_mutex;	/* for orphan inode list */ +	spinlock_t orphan_inode_lock;		/* for orphan inode list */  	unsigned int n_orphans;			/* # of orphan inodes */ +	unsigned int max_orphans;		/* max orphan inodes */  	/* for directory inode management */  	struct list_head dir_inode_list;	/* dir inode list */ @@ -397,6 +473,7 @@ struct f2fs_sb_info {  	unsigned int total_valid_node_count;	/* valid node block count */  	unsigned int total_valid_inode_count;	/* valid inode count */  	int active_logs;			/* # of active logs */ +	int dir_level;				/* directory level */  	block_t user_block_count;		/* # of user blocks */  	block_t total_valid_block_count;	/* # of valid blocks */ @@ -412,6 +489,9 @@ struct f2fs_sb_info {  	struct f2fs_gc_kthread	*gc_thread;	/* GC thread */  	unsigned int cur_victim_sec;		/* current victim section num */ +	/* maximum # of trials to find a victim segment for SSR and GC */ +	unsigned int max_victim_search; +  	/*  	 * for stat information.  	 * one is for the LFS mode, and the other is for the SSR mode. @@ -421,6 +501,7 @@ struct f2fs_sb_info {  	unsigned int segment_count[2];		/* # of allocated segments */  	unsigned int block_count[2];		/* # of allocated blocks */  	int total_hit_ext, read_hit_ext;	/* extent cache hit ratio */ +	int inline_inode;			/* # of inline_data inodes */  	int bg_gc;				/* background gc calls */  	unsigned int n_dirty_dirs;		/* # of dir inodes */  #endif @@ -460,6 +541,11 @@ static inline struct f2fs_node *F2FS_NODE(struct page *page)  	return (struct f2fs_node *)page_address(page);  } +static inline struct f2fs_inode *F2FS_INODE(struct page *page) +{ +	return &((struct f2fs_node *)page_address(page))->i; +} +  static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)  {  	return (struct f2fs_nm_info *)(sbi->nm_info); @@ -485,6 +571,16 @@ static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)  	return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);  } +static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi) +{ +	return sbi->meta_inode->i_mapping; +} + +static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) +{ +	return sbi->node_inode->i_mapping; +} +  static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)  {  	sbi->s_dirty = 1; @@ -520,48 +616,24 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)  	cp->ckpt_flags = cpu_to_le32(ckpt_flags);  } -static inline void mutex_lock_all(struct f2fs_sb_info *sbi) +static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)  { -	int i; - -	for (i = 0; i < NR_GLOBAL_LOCKS; i++) { -		/* -		 * This is the only time we take multiple fs_lock[] -		 * instances; the order is immaterial since we -		 * always hold cp_mutex, which serializes multiple -		 * such operations. -		 */ -		mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); -	} +	down_read(&sbi->cp_rwsem);  } -static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) +static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)  { -	int i = 0; -	for (; i < NR_GLOBAL_LOCKS; i++) -		mutex_unlock(&sbi->fs_lock[i]); +	up_read(&sbi->cp_rwsem);  } -static inline int mutex_lock_op(struct f2fs_sb_info *sbi) +static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)  { -	unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; -	int i = 0; - -	for (; i < NR_GLOBAL_LOCKS; i++) -		if (mutex_trylock(&sbi->fs_lock[i])) -			return i; - -	mutex_lock(&sbi->fs_lock[next_lock]); -	sbi->next_lock_num++; -	return next_lock; +	f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);  } -static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) +static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)  { -	if (ilock < 0) -		return; -	BUG_ON(ilock >= NR_GLOBAL_LOCKS); -	mutex_unlock(&sbi->fs_lock[ilock]); +	up_write(&sbi->cp_rwsem);  }  /* @@ -569,8 +641,9 @@ static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock)   */  static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)  { -	WARN_ON((nid >= NM_I(sbi)->max_nid)); -	if (nid >= NM_I(sbi)->max_nid) +	if (unlikely(nid < F2FS_ROOT_INO(sbi))) +		return -EINVAL; +	if (unlikely(nid >= NM_I(sbi)->max_nid))  		return -EINVAL;  	return 0;  } @@ -583,9 +656,14 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)  static inline int F2FS_HAS_BLOCKS(struct inode *inode)  {  	if (F2FS_I(inode)->i_xattr_nid) -		return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); +		return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;  	else -		return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); +		return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; +} + +static inline bool f2fs_has_xattr_block(unsigned int ofs) +{ +	return ofs == XATTR_NODE_OFFSET;  }  static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, @@ -596,7 +674,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,  	spin_lock(&sbi->stat_lock);  	valid_block_count =  		sbi->total_valid_block_count + (block_t)count; -	if (valid_block_count > sbi->user_block_count) { +	if (unlikely(valid_block_count > sbi->user_block_count)) {  		spin_unlock(&sbi->stat_lock);  		return false;  	} @@ -607,17 +685,16 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,  	return true;  } -static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, +static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,  						struct inode *inode,  						blkcnt_t count)  {  	spin_lock(&sbi->stat_lock); -	BUG_ON(sbi->total_valid_block_count < (block_t) count); -	BUG_ON(inode->i_blocks < count); +	f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); +	f2fs_bug_on(inode->i_blocks < count);  	inode->i_blocks -= count;  	sbi->total_valid_block_count -= (block_t)count;  	spin_unlock(&sbi->stat_lock); -	return 0;  }  static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -628,6 +705,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)  static inline void inode_inc_dirty_dents(struct inode *inode)  { +	inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);  	atomic_inc(&F2FS_I(inode)->dirty_dents);  } @@ -638,6 +716,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)  static inline void inode_dec_dirty_dents(struct inode *inode)  { +	if (!S_ISDIR(inode->i_mode)) +		return; + +	dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);  	atomic_dec(&F2FS_I(inode)->dirty_dents);  } @@ -646,6 +728,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)  	return atomic_read(&sbi->nr_pages[count_type]);  } +static inline int get_dirty_dents(struct inode *inode) +{ +	return atomic_read(&F2FS_I(inode)->dirty_dents); +} +  static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)  {  	unsigned int pages_per_sec = sbi->segs_per_sec * @@ -656,11 +743,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)  static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)  { -	block_t ret; -	spin_lock(&sbi->stat_lock); -	ret = sbi->total_valid_block_count; -	spin_unlock(&sbi->stat_lock); -	return ret; +	return sbi->total_valid_block_count;  }  static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) @@ -679,9 +762,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)  static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)  {  	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); -	int offset = (flag == NAT_BITMAP) ? +	int offset; + +	if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { +		if (flag == NAT_BITMAP) +			return &ckpt->sit_nat_version_bitmap; +		else +			return ((unsigned char *)ckpt + F2FS_BLKSIZE); +	} else { +		offset = (flag == NAT_BITMAP) ?  			le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; -	return &ckpt->sit_nat_version_bitmap + offset; +		return &ckpt->sit_nat_version_bitmap + offset; +	}  }  static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) @@ -708,96 +800,85 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)  }  static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, -						struct inode *inode, -						unsigned int count) +						struct inode *inode)  {  	block_t	valid_block_count;  	unsigned int valid_node_count;  	spin_lock(&sbi->stat_lock); -	valid_block_count = sbi->total_valid_block_count + (block_t)count; -	sbi->alloc_valid_block_count += (block_t)count; -	valid_node_count = sbi->total_valid_node_count + count; - -	if (valid_block_count > sbi->user_block_count) { +	valid_block_count = sbi->total_valid_block_count + 1; +	if (unlikely(valid_block_count > sbi->user_block_count)) {  		spin_unlock(&sbi->stat_lock);  		return false;  	} -	if (valid_node_count > sbi->total_node_count) { +	valid_node_count = sbi->total_valid_node_count + 1; +	if (unlikely(valid_node_count > sbi->total_node_count)) {  		spin_unlock(&sbi->stat_lock);  		return false;  	}  	if (inode) -		inode->i_blocks += count; -	sbi->total_valid_node_count = valid_node_count; -	sbi->total_valid_block_count = valid_block_count; +		inode->i_blocks++; + +	sbi->alloc_valid_block_count++; +	sbi->total_valid_node_count++; +	sbi->total_valid_block_count++;  	spin_unlock(&sbi->stat_lock);  	return true;  }  static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, -						struct inode *inode, -						unsigned int count) +						struct inode *inode)  {  	spin_lock(&sbi->stat_lock); -	BUG_ON(sbi->total_valid_block_count < count); -	BUG_ON(sbi->total_valid_node_count < count); -	BUG_ON(inode->i_blocks < count); +	f2fs_bug_on(!sbi->total_valid_block_count); +	f2fs_bug_on(!sbi->total_valid_node_count); +	f2fs_bug_on(!inode->i_blocks); -	inode->i_blocks -= count; -	sbi->total_valid_node_count -= count; -	sbi->total_valid_block_count -= (block_t)count; +	inode->i_blocks--; +	sbi->total_valid_node_count--; +	sbi->total_valid_block_count--;  	spin_unlock(&sbi->stat_lock);  }  static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)  { -	unsigned int ret; -	spin_lock(&sbi->stat_lock); -	ret = sbi->total_valid_node_count; -	spin_unlock(&sbi->stat_lock); -	return ret; +	return sbi->total_valid_node_count;  }  static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)  {  	spin_lock(&sbi->stat_lock); -	BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); +	f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count);  	sbi->total_valid_inode_count++;  	spin_unlock(&sbi->stat_lock);  } -static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) +static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)  {  	spin_lock(&sbi->stat_lock); -	BUG_ON(!sbi->total_valid_inode_count); +	f2fs_bug_on(!sbi->total_valid_inode_count);  	sbi->total_valid_inode_count--;  	spin_unlock(&sbi->stat_lock); -	return 0;  }  static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)  { -	unsigned int ret; -	spin_lock(&sbi->stat_lock); -	ret = sbi->total_valid_inode_count; -	spin_unlock(&sbi->stat_lock); -	return ret; +	return sbi->total_valid_inode_count;  }  static inline void f2fs_put_page(struct page *page, int unlock)  { -	if (!page || IS_ERR(page)) +	if (!page)  		return;  	if (unlock) { -		BUG_ON(!PageLocked(page)); +		f2fs_bug_on(!PageLocked(page));  		unlock_page(page);  	}  	page_cache_release(page); @@ -814,9 +895,23 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)  }  static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, -					size_t size, void (*ctor)(void *)) +					size_t size)  { -	return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); +	return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL); +} + +static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, +						gfp_t flags) +{ +	void *entry; +retry: +	entry = kmem_cache_alloc(cachep, flags); +	if (!entry) { +		cond_resched(); +		goto retry; +	} + +	return entry;  }  #define RAW_IS_INODE(p)	((p)->footer.nid == (p)->footer.ino) @@ -879,12 +974,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)  enum {  	FI_NEW_INODE,		/* indicate newly allocated inode */  	FI_DIRTY_INODE,		/* indicate inode is dirty or not */ +	FI_DIRTY_DIR,		/* indicate directory has dirty pages */  	FI_INC_LINK,		/* need to increment i_nlink */  	FI_ACL_MODE,		/* indicate acl mode */  	FI_NO_ALLOC,		/* should not allocate any blocks */  	FI_UPDATE_DIR,		/* should update inode block for consistency */  	FI_DELAY_IPUT,		/* used for the recovery */ +	FI_NO_EXTENT,		/* not to use the extent cache */  	FI_INLINE_XATTR,	/* used for inline xattr */ +	FI_INLINE_DATA,		/* used for inline data*/  };  static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -922,6 +1020,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,  {  	if (ri->i_inline & F2FS_INLINE_XATTR)  		set_inode_flag(fi, FI_INLINE_XATTR); +	if (ri->i_inline & F2FS_INLINE_DATA) +		set_inode_flag(fi, FI_INLINE_DATA);  }  static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -931,41 +1031,75 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,  	if (is_inode_flag_set(fi, FI_INLINE_XATTR))  		ri->i_inline |= F2FS_INLINE_XATTR; +	if (is_inode_flag_set(fi, FI_INLINE_DATA)) +		ri->i_inline |= F2FS_INLINE_DATA; +} + +static inline int f2fs_has_inline_xattr(struct inode *inode) +{ +	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);  }  static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)  { -	if (is_inode_flag_set(fi, FI_INLINE_XATTR)) +	if (f2fs_has_inline_xattr(&fi->vfs_inode))  		return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;  	return DEF_ADDRS_PER_INODE;  }  static inline void *inline_xattr_addr(struct page *page)  { -	struct f2fs_inode *ri; -	ri = (struct f2fs_inode *)page_address(page); +	struct f2fs_inode *ri = F2FS_INODE(page);  	return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -  					F2FS_INLINE_XATTR_ADDRS]);  }  static inline int inline_xattr_size(struct inode *inode)  { -	if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) +	if (f2fs_has_inline_xattr(inode))  		return F2FS_INLINE_XATTR_ADDRS << 2;  	else  		return 0;  } +static inline int f2fs_has_inline_data(struct inode *inode) +{ +	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); +} + +static inline void *inline_data_addr(struct page *page) +{ +	struct f2fs_inode *ri = F2FS_INODE(page); +	return (void *)&(ri->i_addr[1]); +} +  static inline int f2fs_readonly(struct super_block *sb)  {  	return sb->s_flags & MS_RDONLY;  } +static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) +{ +	set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); +	sbi->sb->s_flags |= MS_RDONLY; +} + +#define get_inode_mode(i) \ +	((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ +	 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) + +/* get offset of first page in next direct node */ +#define PGOFS_OF_NEXT_DNODE(pgofs, fi)				\ +	((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) :	\ +	(pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) /	\ +	ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi)) +  /*   * file.c   */  int f2fs_sync_file(struct file *, loff_t, loff_t, int);  void truncate_data_blocks(struct dnode_of_data *); +int truncate_blocks(struct inode *, u64);  void f2fs_truncate(struct inode *);  int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);  int f2fs_setattr(struct dentry *, struct iattr *); @@ -979,8 +1113,9 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);   */  void f2fs_set_inode_flags(struct inode *);  struct inode *f2fs_iget(struct super_block *, unsigned long); +int try_to_free_nats(struct f2fs_sb_info *, int);  void update_inode(struct inode *, struct page *); -int update_inode_page(struct inode *); +void update_inode_page(struct inode *);  int f2fs_write_inode(struct inode *, struct writeback_control *);  void f2fs_evict_inode(struct inode *); @@ -1028,12 +1163,16 @@ f2fs_hash_t f2fs_dentry_hash(const char *, size_t);  struct dnode_of_data;  struct node_info; +bool available_free_memory(struct f2fs_sb_info *, int);  int is_checkpointed_node(struct f2fs_sb_info *, nid_t); +bool fsync_mark_done(struct f2fs_sb_info *, nid_t); +void fsync_mark_clear(struct f2fs_sb_info *, nid_t);  void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);  int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);  int truncate_inode_blocks(struct inode *, pgoff_t);  int truncate_xattr_node(struct inode *, struct page *); -int remove_inode_page(struct inode *); +int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); +void remove_inode_page(struct inode *);  struct page *new_inode_page(struct inode *, const struct qstr *);  struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);  void ra_node_page(struct f2fs_sb_info *, nid_t); @@ -1046,6 +1185,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);  void alloc_nid_failed(struct f2fs_sb_info *, nid_t);  void recover_node_page(struct f2fs_sb_info *, struct page *,  		struct f2fs_summary *, struct node_info *, block_t); +bool recover_xattr_data(struct inode *, struct page *, block_t);  int recover_inode_page(struct f2fs_sb_info *, struct page *);  int restore_node_summary(struct f2fs_sb_info *, unsigned int,  				struct f2fs_summary_block *); @@ -1059,24 +1199,30 @@ void destroy_node_manager_caches(void);   * segment.c   */  void f2fs_balance_fs(struct f2fs_sb_info *); +void f2fs_balance_fs_bg(struct f2fs_sb_info *); +int f2fs_issue_flush(struct f2fs_sb_info *); +int create_flush_cmd_control(struct f2fs_sb_info *); +void destroy_flush_cmd_control(struct f2fs_sb_info *);  void invalidate_blocks(struct f2fs_sb_info *, block_t); +void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);  void clear_prefree_segments(struct f2fs_sb_info *); +void discard_next_dnode(struct f2fs_sb_info *);  int npages_for_summary_flush(struct f2fs_sb_info *);  void allocate_new_segments(struct f2fs_sb_info *);  struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); -struct bio *f2fs_bio_alloc(struct block_device *, int); -void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); -void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);  void write_meta_page(struct f2fs_sb_info *, struct page *); -void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, -					block_t, block_t *); -void write_data_page(struct inode *, struct page *, struct dnode_of_data*, -					block_t, block_t *); -void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); +void write_node_page(struct f2fs_sb_info *, struct page *, +		struct f2fs_io_info *, unsigned int, block_t, block_t *); +void write_data_page(struct page *, struct dnode_of_data *, block_t *, +					struct f2fs_io_info *); +void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);  void recover_data_page(struct f2fs_sb_info *, struct page *,  				struct f2fs_summary *, block_t, block_t);  void rewrite_node_page(struct f2fs_sb_info *, struct page *,  				struct f2fs_summary *, block_t, block_t); +void allocate_data_block(struct f2fs_sb_info *, struct page *, +		block_t, block_t *, struct f2fs_summary *, int); +void f2fs_wait_on_page_writeback(struct page *, enum page_type);  void write_data_summaries(struct f2fs_sb_info *, block_t);  void write_node_summaries(struct f2fs_sb_info *, block_t);  int lookup_journal_in_cursum(struct f2fs_summary_block *, @@ -1084,23 +1230,25 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *,  void flush_sit_entries(struct f2fs_sb_info *);  int build_segment_manager(struct f2fs_sb_info *);  void destroy_segment_manager(struct f2fs_sb_info *); +int __init create_segment_manager_caches(void); +void destroy_segment_manager_caches(void);  /*   * checkpoint.c   */  struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);  struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +int ra_meta_pages(struct f2fs_sb_info *, int, int, int);  long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);  int acquire_orphan_inode(struct f2fs_sb_info *);  void release_orphan_inode(struct f2fs_sb_info *);  void add_orphan_inode(struct f2fs_sb_info *, nid_t);  void remove_orphan_inode(struct f2fs_sb_info *, nid_t); -int recover_orphan_inodes(struct f2fs_sb_info *); +void recover_orphan_inodes(struct f2fs_sb_info *);  int get_valid_checkpoint(struct f2fs_sb_info *);  void set_dirty_dir_page(struct inode *, struct page *);  void add_dirty_dir_inode(struct inode *);  void remove_dirty_dir_inode(struct inode *); -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t);  void sync_dirty_dir_inodes(struct f2fs_sb_info *);  void write_checkpoint(struct f2fs_sb_info *, bool);  void init_orphan_info(struct f2fs_sb_info *); @@ -1110,13 +1258,18 @@ void destroy_checkpoint_caches(void);  /*   * data.c   */ +void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); +int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); +void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, +						struct f2fs_io_info *);  int reserve_new_block(struct dnode_of_data *); +int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);  void update_extent_cache(block_t, struct dnode_of_data *);  struct page *find_data_page(struct inode *, pgoff_t, bool);  struct page *get_lock_data_page(struct inode *, pgoff_t);  struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); -int do_write_data_page(struct page *); +int do_write_data_page(struct page *, struct f2fs_io_info *); +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);  /*   * gc.c @@ -1149,13 +1302,13 @@ struct f2fs_stat_info {  	int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;  	int nats, sits, fnids;  	int total_count, utilization; -	int bg_gc; +	int bg_gc, inline_inode;  	unsigned int valid_count, valid_node_count, valid_inode_count;  	unsigned int bimodal, avg_vblocks;  	int util_free, util_valid, util_invalid;  	int rsvd_segs, overp_segs;  	int dirty_count, node_pages, meta_pages; -	int prefree_count, call_count; +	int prefree_count, call_count, cp_count;  	int tot_segs, node_segs, data_segs, free_segs, free_secs;  	int tot_blks, data_blks, node_blks;  	int curseg[NR_CURSEG_TYPE]; @@ -1169,10 +1322,31 @@ struct f2fs_stat_info {  static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)  { -	return (struct f2fs_stat_info*)sbi->stat_info; +	return (struct f2fs_stat_info *)sbi->stat_info;  } -#define stat_inc_call_count(si)	((si)->call_count++) +#define stat_inc_cp_count(si)		((si)->cp_count++) +#define stat_inc_call_count(si)		((si)->call_count++) +#define stat_inc_bggc_count(sbi)	((sbi)->bg_gc++) +#define stat_inc_dirty_dir(sbi)		((sbi)->n_dirty_dirs++) +#define stat_dec_dirty_dir(sbi)		((sbi)->n_dirty_dirs--) +#define stat_inc_total_hit(sb)		((F2FS_SB(sb))->total_hit_ext++) +#define stat_inc_read_hit(sb)		((F2FS_SB(sb))->read_hit_ext++) +#define stat_inc_inline_inode(inode)					\ +	do {								\ +		if (f2fs_has_inline_data(inode))			\ +			((F2FS_SB(inode->i_sb))->inline_inode++);	\ +	} while (0) +#define stat_dec_inline_inode(inode)					\ +	do {								\ +		if (f2fs_has_inline_data(inode))			\ +			((F2FS_SB(inode->i_sb))->inline_inode--);	\ +	} while (0) + +#define stat_inc_seg_type(sbi, curseg)					\ +		((sbi)->segment_count[(curseg)->alloc_type]++) +#define stat_inc_block_count(sbi, curseg)				\ +		((sbi)->block_count[(curseg)->alloc_type]++)  #define stat_inc_seg_count(sbi, type)					\  	do {								\ @@ -1206,7 +1380,17 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);  void __init f2fs_create_root_stats(void);  void f2fs_destroy_root_stats(void);  #else +#define stat_inc_cp_count(si)  #define stat_inc_call_count(si) +#define stat_inc_bggc_count(si) +#define stat_inc_dirty_dir(sbi) +#define stat_dec_dirty_dir(sbi) +#define stat_inc_total_hit(sb) +#define stat_inc_read_hit(sb) +#define stat_inc_inline_inode(inode) +#define stat_dec_inline_inode(inode) +#define stat_inc_seg_type(sbi, curseg) +#define stat_inc_block_count(sbi, curseg)  #define stat_inc_seg_count(si, type)  #define stat_inc_tot_blk_count(si, blks)  #define stat_inc_data_blk_count(si, blks) @@ -1227,4 +1411,14 @@ extern const struct address_space_operations f2fs_meta_aops;  extern const struct inode_operations f2fs_dir_inode_operations;  extern const struct inode_operations f2fs_symlink_inode_operations;  extern const struct inode_operations f2fs_special_inode_operations; + +/* + * inline.c + */ +bool f2fs_may_inline(struct inode *); +int f2fs_read_inline_data(struct inode *, struct page *); +int f2fs_convert_inline_data(struct inode *, pgoff_t); +int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); +void truncate_inline_data(struct inode *, u64); +int recover_inline_data(struct inode *, struct page *);  #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 02c906971cc..7d8b9627509 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -19,6 +19,7 @@  #include <linux/compat.h>  #include <linux/uaccess.h>  #include <linux/mount.h> +#include <linux/pagevec.h>  #include "f2fs.h"  #include "node.h" @@ -33,41 +34,26 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,  	struct page *page = vmf->page;  	struct inode *inode = file_inode(vma->vm_file);  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	block_t old_blk_addr;  	struct dnode_of_data dn; -	int err, ilock; +	int err;  	f2fs_balance_fs(sbi);  	sb_start_pagefault(inode->i_sb);  	/* block allocation */ -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	set_new_dnode(&dn, inode, NULL, NULL, 0); -	err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); -	if (err) { -		mutex_unlock_op(sbi, ilock); +	err = f2fs_reserve_block(&dn, page->index); +	f2fs_unlock_op(sbi); +	if (err)  		goto out; -	} - -	old_blk_addr = dn.data_blkaddr; - -	if (old_blk_addr == NULL_ADDR) { -		err = reserve_new_block(&dn); -		if (err) { -			f2fs_put_dnode(&dn); -			mutex_unlock_op(sbi, ilock); -			goto out; -		} -	} -	f2fs_put_dnode(&dn); -	mutex_unlock_op(sbi, ilock);  	file_update_time(vma->vm_file);  	lock_page(page); -	if (page->mapping != inode->i_mapping || +	if (unlikely(page->mapping != inode->i_mapping ||  			page_offset(page) > i_size_read(inode) || -			!PageUptodate(page)) { +			!PageUptodate(page))) {  		unlock_page(page);  		err = -EFAULT;  		goto out; @@ -88,9 +74,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,  	set_page_dirty(page);  	SetPageUptodate(page); +	trace_f2fs_vm_page_mkwrite(page, DATA);  mapped:  	/* fill the page */ -	wait_on_page_writeback(page); +	f2fs_wait_on_page_writeback(page, DATA);  out:  	sb_end_pagefault(inode->i_sb);  	return block_page_mkwrite_return(err); @@ -98,6 +85,7 @@ out:  static const struct vm_operations_struct f2fs_file_vm_ops = {  	.fault		= filemap_fault, +	.map_pages	= filemap_map_pages,  	.page_mkwrite	= f2fs_vm_page_mkwrite,  	.remap_pages	= generic_file_remap_pages,  }; @@ -125,6 +113,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  {  	struct inode *inode = file->f_mapping->host; +	struct f2fs_inode_info *fi = F2FS_I(inode);  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	int ret = 0;  	bool need_cp = false; @@ -134,7 +123,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  		.for_reclaim = 0,  	}; -	if (f2fs_readonly(inode->i_sb)) +	if (unlikely(f2fs_readonly(inode->i_sb)))  		return 0;  	trace_f2fs_sync_file_enter(inode); @@ -147,7 +136,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	/* guarantee free sections for fsync */  	f2fs_balance_fs(sbi); -	mutex_lock(&inode->i_mutex); +	down_read(&fi->i_sem);  	/*  	 * Both of fdatasync() and fsync() are able to be recovered from @@ -164,40 +153,174 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))  		need_cp = true; +	up_read(&fi->i_sem); +  	if (need_cp) {  		nid_t pino; -		F2FS_I(inode)->xattr_ver = 0; -  		/* all the dirty node pages should be flushed for POR */  		ret = f2fs_sync_fs(inode->i_sb, 1); + +		down_write(&fi->i_sem); +		F2FS_I(inode)->xattr_ver = 0;  		if (file_wrong_pino(inode) && inode->i_nlink == 1 &&  					get_parent_ino(inode, &pino)) {  			F2FS_I(inode)->i_pino = pino;  			file_got_pino(inode); +			up_write(&fi->i_sem);  			mark_inode_dirty_sync(inode);  			ret = f2fs_write_inode(inode, NULL);  			if (ret)  				goto out; +		} else { +			up_write(&fi->i_sem);  		}  	} else {  		/* if there is no written node page, write its inode page */  		while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { +			if (fsync_mark_done(sbi, inode->i_ino)) +				goto out;  			mark_inode_dirty_sync(inode);  			ret = f2fs_write_inode(inode, NULL);  			if (ret)  				goto out;  		} -		filemap_fdatawait_range(sbi->node_inode->i_mapping, -							0, LONG_MAX); -		ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); +		ret = wait_on_node_pages_writeback(sbi, inode->i_ino); +		if (ret) +			goto out; +		ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));  	}  out: -	mutex_unlock(&inode->i_mutex);  	trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);  	return ret;  } +static pgoff_t __get_first_dirty_index(struct address_space *mapping, +						pgoff_t pgofs, int whence) +{ +	struct pagevec pvec; +	int nr_pages; + +	if (whence != SEEK_DATA) +		return 0; + +	/* find first dirty page index */ +	pagevec_init(&pvec, 0); +	nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); +	pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; +	pagevec_release(&pvec); +	return pgofs; +} + +static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, +							int whence) +{ +	switch (whence) { +	case SEEK_DATA: +		if ((blkaddr == NEW_ADDR && dirty == pgofs) || +			(blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) +			return true; +		break; +	case SEEK_HOLE: +		if (blkaddr == NULL_ADDR) +			return true; +		break; +	} +	return false; +} + +static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) +{ +	struct inode *inode = file->f_mapping->host; +	loff_t maxbytes = inode->i_sb->s_maxbytes; +	struct dnode_of_data dn; +	pgoff_t pgofs, end_offset, dirty; +	loff_t data_ofs = offset; +	loff_t isize; +	int err = 0; + +	mutex_lock(&inode->i_mutex); + +	isize = i_size_read(inode); +	if (offset >= isize) +		goto fail; + +	/* handle inline data case */ +	if (f2fs_has_inline_data(inode)) { +		if (whence == SEEK_HOLE) +			data_ofs = isize; +		goto found; +	} + +	pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT); + +	dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence); + +	for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) { +		set_new_dnode(&dn, inode, NULL, NULL, 0); +		err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); +		if (err && err != -ENOENT) { +			goto fail; +		} else if (err == -ENOENT) { +			/* direct node is not exist */ +			if (whence == SEEK_DATA) { +				pgofs = PGOFS_OF_NEXT_DNODE(pgofs, +							F2FS_I(inode)); +				continue; +			} else { +				goto found; +			} +		} + +		end_offset = IS_INODE(dn.node_page) ? +			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + +		/* find data/hole in dnode block */ +		for (; dn.ofs_in_node < end_offset; +				dn.ofs_in_node++, pgofs++, +				data_ofs = pgofs << PAGE_CACHE_SHIFT) { +			block_t blkaddr; +			blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + +			if (__found_offset(blkaddr, dirty, pgofs, whence)) { +				f2fs_put_dnode(&dn); +				goto found; +			} +		} +		f2fs_put_dnode(&dn); +	} + +	if (whence == SEEK_DATA) +		goto fail; +found: +	if (whence == SEEK_HOLE && data_ofs > isize) +		data_ofs = isize; +	mutex_unlock(&inode->i_mutex); +	return vfs_setpos(file, data_ofs, maxbytes); +fail: +	mutex_unlock(&inode->i_mutex); +	return -ENXIO; +} + +static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) +{ +	struct inode *inode = file->f_mapping->host; +	loff_t maxbytes = inode->i_sb->s_maxbytes; + +	switch (whence) { +	case SEEK_SET: +	case SEEK_CUR: +	case SEEK_END: +		return generic_file_llseek_size(file, offset, whence, +						maxbytes, i_size_read(inode)); +	case SEEK_DATA: +	case SEEK_HOLE: +		return f2fs_seek_block(file, offset, whence); +	} + +	return -EINVAL; +} +  static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)  {  	file_accessed(file); @@ -215,7 +338,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)  	raw_node = F2FS_NODE(dn->node_page);  	addr = blkaddr_in_node(raw_node) + ofs; -	for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { +	for (; count > 0; count--, addr++, dn->ofs_in_node++) {  		block_t blkaddr = le32_to_cpu(*addr);  		if (blkaddr == NULL_ADDR)  			continue; @@ -246,6 +369,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)  	unsigned offset = from & (PAGE_CACHE_SIZE - 1);  	struct page *page; +	if (f2fs_has_inline_data(inode)) +		return truncate_inline_data(inode, from); +  	if (!offset)  		return; @@ -254,48 +380,48 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)  		return;  	lock_page(page); -	if (page->mapping != inode->i_mapping) { +	if (unlikely(page->mapping != inode->i_mapping)) {  		f2fs_put_page(page, 1);  		return;  	} -	wait_on_page_writeback(page); +	f2fs_wait_on_page_writeback(page, DATA);  	zero_user(page, offset, PAGE_CACHE_SIZE - offset);  	set_page_dirty(page);  	f2fs_put_page(page, 1);  } -static int truncate_blocks(struct inode *inode, u64 from) +int truncate_blocks(struct inode *inode, u64 from)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	unsigned int blocksize = inode->i_sb->s_blocksize;  	struct dnode_of_data dn;  	pgoff_t free_from; -	int count = 0, ilock = -1; -	int err; +	int count = 0, err = 0;  	trace_f2fs_truncate_blocks_enter(inode, from); +	if (f2fs_has_inline_data(inode)) +		goto done; +  	free_from = (pgoff_t)  			((from + blocksize - 1) >> (sbi->log_blocksize)); -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi); +  	set_new_dnode(&dn, inode, NULL, NULL, 0);  	err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);  	if (err) {  		if (err == -ENOENT)  			goto free_next; -		mutex_unlock_op(sbi, ilock); +		f2fs_unlock_op(sbi);  		trace_f2fs_truncate_blocks_exit(inode, err);  		return err;  	} -	if (IS_INODE(dn.node_page)) -		count = ADDRS_PER_INODE(F2FS_I(inode)); -	else -		count = ADDRS_PER_BLOCK; +	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));  	count -= dn.ofs_in_node; -	BUG_ON(count < 0); +	f2fs_bug_on(count < 0);  	if (dn.ofs_in_node || IS_INODE(dn.node_page)) {  		truncate_data_blocks_range(&dn, count); @@ -305,8 +431,8 @@ static int truncate_blocks(struct inode *inode, u64 from)  	f2fs_put_dnode(&dn);  free_next:  	err = truncate_inode_blocks(inode, free_from); -	mutex_unlock_op(sbi, ilock); - +	f2fs_unlock_op(sbi); +done:  	/* lastly zero out the first data page */  	truncate_partial_data_page(inode, from); @@ -380,6 +506,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_SIZE) &&  			attr->ia_size != i_size_read(inode)) { +		err = f2fs_convert_inline_data(inode, attr->ia_size); +		if (err) +			return err; +  		truncate_setsize(inode, attr->ia_size);  		f2fs_truncate(inode);  		f2fs_balance_fs(F2FS_SB(inode->i_sb)); @@ -388,7 +518,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)  	__setattr_copy(inode, attr);  	if (attr->ia_valid & ATTR_MODE) { -		err = f2fs_acl_chmod(inode); +		err = posix_acl_chmod(inode, get_inode_mode(inode));  		if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {  			inode->i_mode = fi->i_acl_mode;  			clear_inode_flag(fi, FI_ACL_MODE); @@ -403,12 +533,14 @@ const struct inode_operations f2fs_file_inode_operations = {  	.getattr	= f2fs_getattr,  	.setattr	= f2fs_setattr,  	.get_acl	= f2fs_get_acl, +	.set_acl	= f2fs_set_acl,  #ifdef CONFIG_F2FS_FS_XATTR  	.setxattr	= generic_setxattr,  	.getxattr	= generic_getxattr,  	.listxattr	= f2fs_listxattr,  	.removexattr	= generic_removexattr,  #endif +	.fiemap		= f2fs_fiemap,  };  static void fill_zero(struct inode *inode, pgoff_t index, @@ -416,19 +548,18 @@ static void fill_zero(struct inode *inode, pgoff_t index,  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	struct page *page; -	int ilock;  	if (!len)  		return;  	f2fs_balance_fs(sbi); -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	page = get_new_data_page(inode, NULL, index, false); -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  	if (!IS_ERR(page)) { -		wait_on_page_writeback(page); +		f2fs_wait_on_page_writeback(page, DATA);  		zero_user(page, start, len);  		set_page_dirty(page);  		f2fs_put_page(page, 1); @@ -458,12 +589,16 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)  	return 0;  } -static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) +static int punch_hole(struct inode *inode, loff_t offset, loff_t len)  {  	pgoff_t pg_start, pg_end;  	loff_t off_start, off_end;  	int ret = 0; +	ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); +	if (ret) +		return ret; +  	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;  	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -484,7 +619,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)  			struct address_space *mapping = inode->i_mapping;  			loff_t blk_start, blk_end;  			struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -			int ilock;  			f2fs_balance_fs(sbi); @@ -493,18 +627,12 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)  			truncate_inode_pages_range(mapping, blk_start,  					blk_end - 1); -			ilock = mutex_lock_op(sbi); +			f2fs_lock_op(sbi);  			ret = truncate_hole(inode, pg_start, pg_end); -			mutex_unlock_op(sbi, ilock); +			f2fs_unlock_op(sbi);  		}  	} -	if (!(mode & FALLOC_FL_KEEP_SIZE) && -		i_size_read(inode) <= (offset + len)) { -		i_size_write(inode, offset); -		mark_inode_dirty(inode); -	} -  	return ret;  } @@ -521,35 +649,29 @@ static int expand_inode_data(struct inode *inode, loff_t offset,  	if (ret)  		return ret; +	ret = f2fs_convert_inline_data(inode, offset + len); +	if (ret) +		return ret; +  	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;  	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;  	off_start = offset & (PAGE_CACHE_SIZE - 1);  	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); +	f2fs_lock_op(sbi); +  	for (index = pg_start; index <= pg_end; index++) {  		struct dnode_of_data dn; -		int ilock; -		ilock = mutex_lock_op(sbi); +		if (index == pg_end && !off_end) +			goto noalloc; +  		set_new_dnode(&dn, inode, NULL, NULL, 0); -		ret = get_dnode_of_data(&dn, index, ALLOC_NODE); -		if (ret) { -			mutex_unlock_op(sbi, ilock); +		ret = f2fs_reserve_block(&dn, index); +		if (ret)  			break; -		} - -		if (dn.data_blkaddr == NULL_ADDR) { -			ret = reserve_new_block(&dn); -			if (ret) { -				f2fs_put_dnode(&dn); -				mutex_unlock_op(sbi, ilock); -				break; -			} -		} -		f2fs_put_dnode(&dn); -		mutex_unlock_op(sbi, ilock); - +noalloc:  		if (pg_start == pg_end)  			new_size = offset + len;  		else if (index == pg_start && off_start) @@ -564,7 +686,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset,  		i_size_read(inode) < new_size) {  		i_size_write(inode, new_size);  		mark_inode_dirty(inode); +		update_inode_page(inode);  	} +	f2fs_unlock_op(sbi);  	return ret;  } @@ -578,8 +702,10 @@ static long f2fs_fallocate(struct file *file, int mode,  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))  		return -EOPNOTSUPP; +	mutex_lock(&inode->i_mutex); +  	if (mode & FALLOC_FL_PUNCH_HOLE) -		ret = punch_hole(inode, offset, len, mode); +		ret = punch_hole(inode, offset, len);  	else  		ret = expand_inode_data(inode, offset, len, mode); @@ -587,6 +713,9 @@ static long f2fs_fallocate(struct file *file, int mode,  		inode->i_mtime = inode->i_ctime = CURRENT_TIME;  		mark_inode_dirty(inode);  	} + +	mutex_unlock(&inode->i_mutex); +  	trace_f2fs_fallocate(inode, mode, offset, len, ret);  	return ret;  } @@ -682,11 +811,11 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)  #endif  const struct file_operations f2fs_file_operations = { -	.llseek		= generic_file_llseek, -	.read		= do_sync_read, -	.write		= do_sync_write, -	.aio_read	= generic_file_aio_read, -	.aio_write	= generic_file_aio_write, +	.llseek		= f2fs_llseek, +	.read		= new_sync_read, +	.write		= new_sync_write, +	.read_iter	= generic_file_read_iter, +	.write_iter	= generic_file_write_iter,  	.open		= generic_file_open,  	.mmap		= f2fs_file_mmap,  	.fsync		= f2fs_sync_file, @@ -696,5 +825,5 @@ const struct file_operations f2fs_file_operations = {  	.compat_ioctl	= f2fs_compat_ioctl,  #endif  	.splice_read	= generic_file_splice_read, -	.splice_write	= generic_file_splice_write, +	.splice_write	= iter_file_splice_write,  }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2f157e88368..b90dbe55403 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -77,13 +77,15 @@ static int gc_thread_func(void *data)  		else  			wait_ms = increase_sleep_time(gc_th, wait_ms); -#ifdef CONFIG_F2FS_STAT_FS -		sbi->bg_gc++; -#endif +		stat_inc_bggc_count(sbi);  		/* if return value is not zero, no victim was selected */  		if (f2fs_gc(sbi))  			wait_ms = gc_th->no_gc_sleep_time; + +		/* balancing f2fs's metadata periodically */ +		f2fs_balance_fs_bg(sbi); +  	} while (!kthread_should_stop());  	return 0;  } @@ -117,7 +119,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)  		kfree(gc_th);  		sbi->gc_thread = NULL;  	} -  out:  	return err;  } @@ -162,8 +163,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,  		p->ofs_unit = sbi->segs_per_sec;  	} -	if (p->max_search > MAX_VICTIM_SEARCH) -		p->max_search = MAX_VICTIM_SEARCH; +	if (p->max_search > sbi->max_victim_search) +		p->max_search = sbi->max_victim_search;  	p->offset = sbi->last_victim[p->gc_mode];  } @@ -236,8 +237,8 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)  	return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));  } -static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, -					struct victim_sel_policy *p) +static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, +			unsigned int segno, struct victim_sel_policy *p)  {  	if (p->alloc_mode == SSR)  		return get_seg_entry(sbi, segno)->ckpt_valid_blocks; @@ -293,7 +294,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,  			}  			break;  		} -		p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; + +		p.offset = segno + p.ofs_unit; +		if (p.ofs_unit > 1) +			p.offset -= segno % p.ofs_unit; +  		secno = GET_SECNO(sbi, segno);  		if (sec_usage_check(sbi, secno)) @@ -306,10 +311,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,  		if (p.min_cost > cost) {  			p.min_segno = segno;  			p.min_cost = cost; -		} - -		if (cost == max_cost) +		} else if (unlikely(cost == max_cost)) {  			continue; +		}  		if (nsearched++ >= p.max_search) {  			sbi->last_victim[p.gc_mode] = segno; @@ -358,12 +362,8 @@ static void add_gc_inode(struct inode *inode, struct list_head *ilist)  		iput(inode);  		return;  	} -repeat: -	new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); -	if (!new_ie) { -		cond_resched(); -		goto repeat; -	} + +	new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);  	new_ie->inode = inode;  	list_add_tail(&new_ie->list, ilist);  } @@ -428,7 +428,7 @@ next_step:  		/* set page dirty and write it */  		if (gc_type == FG_GC) { -			f2fs_wait_on_page_writeback(node_page, NODE, true); +			f2fs_wait_on_page_writeback(node_page, NODE);  			set_page_dirty(node_page);  		} else {  			if (!PageWriteback(node_page)) @@ -520,23 +520,23 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,  static void move_data_page(struct inode *inode, struct page *page, int gc_type)  { +	struct f2fs_io_info fio = { +		.type = DATA, +		.rw = WRITE_SYNC, +	}; +  	if (gc_type == BG_GC) {  		if (PageWriteback(page))  			goto out;  		set_page_dirty(page);  		set_cold_data(page);  	} else { -		struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - -		f2fs_wait_on_page_writeback(page, DATA, true); +		f2fs_wait_on_page_writeback(page, DATA); -		if (clear_page_dirty_for_io(page) && -			S_ISDIR(inode->i_mode)) { -			dec_page_count(sbi, F2FS_DIRTY_DENTS); +		if (clear_page_dirty_for_io(page))  			inode_dec_dirty_dents(inode); -		}  		set_cold_data(page); -		do_write_data_page(page); +		do_write_data_page(page, &fio);  		clear_cold_data(page);  	}  out: @@ -630,7 +630,7 @@ next_iput:  		goto next_step;  	if (gc_type == FG_GC) { -		f2fs_submit_bio(sbi, DATA, true); +		f2fs_submit_merged_bio(sbi, DATA, WRITE);  		/*  		 * In the case of FG_GC, it'd be better to reclaim this victim @@ -663,8 +663,6 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,  	/* read segment summary of victim */  	sum_page = get_sum_page(sbi, segno); -	if (IS_ERR(sum_page)) -		return;  	blk_start_plug(&plug); @@ -696,7 +694,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi)  	INIT_LIST_HEAD(&ilist);  gc_more: -	if (!(sbi->sb->s_flags & MS_ACTIVE)) +	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) +		goto stop; +	if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))  		goto stop;  	if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { @@ -708,6 +708,11 @@ gc_more:  		goto stop;  	ret = 0; +	/* readahead multi ssa blocks those have contiguous address */ +	if (sbi->segs_per_sec > 1) +		ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec, +								META_SSA); +  	for (i = 0; i < sbi->segs_per_sec; i++)  		do_garbage_collect(sbi, segno + i, &ilist, gc_type); @@ -737,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)  int __init create_gc_caches(void)  {  	winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", -			sizeof(struct inode_entry), NULL); +			sizeof(struct inode_entry));  	if (!winode_slab)  		return -ENOMEM;  	return 0; diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 507056d2220..5d5eb6047bf 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -20,7 +20,7 @@  #define LIMIT_FREE_BLOCK	40 /* percentage over invalid + free space */  /* Search max. number of dirty segments to select a victim segment */ -#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */ +#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */  struct f2fs_gc_kthread {  	struct task_struct *f2fs_gc_task; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c new file mode 100644 index 00000000000..1bba5228c19 --- /dev/null +++ b/fs/f2fs/inline.c @@ -0,0 +1,250 @@ +/* + * fs/f2fs/inline.c + * Copyright (c) 2013, Intel Corporation + * Authors: Huajun Li <huajun.li@intel.com> + *          Haicheng Li <haicheng.li@intel.com> + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/f2fs_fs.h> + +#include "f2fs.h" + +bool f2fs_may_inline(struct inode *inode) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	block_t nr_blocks; +	loff_t i_size; + +	if (!test_opt(sbi, INLINE_DATA)) +		return false; + +	nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; +	if (inode->i_blocks > nr_blocks) +		return false; + +	i_size = i_size_read(inode); +	if (i_size > MAX_INLINE_DATA) +		return false; + +	return true; +} + +int f2fs_read_inline_data(struct inode *inode, struct page *page) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	struct page *ipage; +	void *src_addr, *dst_addr; + +	if (page->index) { +		zero_user_segment(page, 0, PAGE_CACHE_SIZE); +		goto out; +	} + +	ipage = get_node_page(sbi, inode->i_ino); +	if (IS_ERR(ipage)) { +		unlock_page(page); +		return PTR_ERR(ipage); +	} + +	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + +	/* Copy the whole inline data block */ +	src_addr = inline_data_addr(ipage); +	dst_addr = kmap(page); +	memcpy(dst_addr, src_addr, MAX_INLINE_DATA); +	kunmap(page); +	f2fs_put_page(ipage, 1); + +out: +	SetPageUptodate(page); +	unlock_page(page); + +	return 0; +} + +static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) +{ +	int err; +	struct page *ipage; +	struct dnode_of_data dn; +	void *src_addr, *dst_addr; +	block_t new_blk_addr; +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	struct f2fs_io_info fio = { +		.type = DATA, +		.rw = WRITE_SYNC | REQ_PRIO, +	}; + +	f2fs_lock_op(sbi); +	ipage = get_node_page(sbi, inode->i_ino); +	if (IS_ERR(ipage)) { +		err = PTR_ERR(ipage); +		goto out; +	} + +	/* +	 * i_addr[0] is not used for inline data, +	 * so reserving new block will not destroy inline data +	 */ +	set_new_dnode(&dn, inode, ipage, NULL, 0); +	err = f2fs_reserve_block(&dn, 0); +	if (err) +		goto out; + +	f2fs_wait_on_page_writeback(page, DATA); +	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + +	/* Copy the whole inline data block */ +	src_addr = inline_data_addr(ipage); +	dst_addr = kmap(page); +	memcpy(dst_addr, src_addr, MAX_INLINE_DATA); +	kunmap(page); +	SetPageUptodate(page); + +	/* write data page to try to make data consistent */ +	set_page_writeback(page); +	write_data_page(page, &dn, &new_blk_addr, &fio); +	update_extent_cache(new_blk_addr, &dn); +	f2fs_wait_on_page_writeback(page, DATA); + +	/* clear inline data and flag after data writeback */ +	zero_user_segment(ipage, INLINE_DATA_OFFSET, +				 INLINE_DATA_OFFSET + MAX_INLINE_DATA); +	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); +	stat_dec_inline_inode(inode); + +	sync_inode_page(&dn); +	f2fs_put_dnode(&dn); +out: +	f2fs_unlock_op(sbi); +	return err; +} + +int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) +{ +	struct page *page; +	int err; + +	if (!f2fs_has_inline_data(inode)) +		return 0; +	else if (to_size <= MAX_INLINE_DATA) +		return 0; + +	page = grab_cache_page(inode->i_mapping, 0); +	if (!page) +		return -ENOMEM; + +	err = __f2fs_convert_inline_data(inode, page); +	f2fs_put_page(page, 1); +	return err; +} + +int f2fs_write_inline_data(struct inode *inode, +			   struct page *page, unsigned size) +{ +	void *src_addr, *dst_addr; +	struct page *ipage; +	struct dnode_of_data dn; +	int err; + +	set_new_dnode(&dn, inode, NULL, NULL, 0); +	err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); +	if (err) +		return err; +	ipage = dn.inode_page; + +	f2fs_wait_on_page_writeback(ipage, NODE); +	zero_user_segment(ipage, INLINE_DATA_OFFSET, +				 INLINE_DATA_OFFSET + MAX_INLINE_DATA); +	src_addr = kmap(page); +	dst_addr = inline_data_addr(ipage); +	memcpy(dst_addr, src_addr, size); +	kunmap(page); + +	/* Release the first data block if it is allocated */ +	if (!f2fs_has_inline_data(inode)) { +		truncate_data_blocks_range(&dn, 1); +		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); +		stat_inc_inline_inode(inode); +	} + +	sync_inode_page(&dn); +	f2fs_put_dnode(&dn); + +	return 0; +} + +void truncate_inline_data(struct inode *inode, u64 from) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	struct page *ipage; + +	if (from >= MAX_INLINE_DATA) +		return; + +	ipage = get_node_page(sbi, inode->i_ino); +	if (IS_ERR(ipage)) +		return; + +	f2fs_wait_on_page_writeback(ipage, NODE); + +	zero_user_segment(ipage, INLINE_DATA_OFFSET + from, +				INLINE_DATA_OFFSET + MAX_INLINE_DATA); +	set_page_dirty(ipage); +	f2fs_put_page(ipage, 1); +} + +int recover_inline_data(struct inode *inode, struct page *npage) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	struct f2fs_inode *ri = NULL; +	void *src_addr, *dst_addr; +	struct page *ipage; + +	/* +	 * The inline_data recovery policy is as follows. +	 * [prev.] [next] of inline_data flag +	 *    o       o  -> recover inline_data +	 *    o       x  -> remove inline_data, and then recover data blocks +	 *    x       o  -> remove inline_data, and then recover inline_data +	 *    x       x  -> recover data blocks +	 */ +	if (IS_INODE(npage)) +		ri = F2FS_INODE(npage); + +	if (f2fs_has_inline_data(inode) && +			ri && ri->i_inline & F2FS_INLINE_DATA) { +process_inline: +		ipage = get_node_page(sbi, inode->i_ino); +		f2fs_bug_on(IS_ERR(ipage)); + +		f2fs_wait_on_page_writeback(ipage, NODE); + +		src_addr = inline_data_addr(npage); +		dst_addr = inline_data_addr(ipage); +		memcpy(dst_addr, src_addr, MAX_INLINE_DATA); +		update_inode(inode, ipage); +		f2fs_put_page(ipage, 1); +		return -1; +	} + +	if (f2fs_has_inline_data(inode)) { +		ipage = get_node_page(sbi, inode->i_ino); +		f2fs_bug_on(IS_ERR(ipage)); +		f2fs_wait_on_page_writeback(ipage, NODE); +		zero_user_segment(ipage, INLINE_DATA_OFFSET, +				 INLINE_DATA_OFFSET + MAX_INLINE_DATA); +		clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); +		update_inode(inode, ipage); +		f2fs_put_page(ipage, 1); +	} else if (ri && ri->i_inline & F2FS_INLINE_DATA) { +		truncate_blocks(inode, 0); +		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); +		goto process_inline; +	} +	return 0; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9339cd29204..2cf6962f6cc 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -12,6 +12,7 @@  #include <linux/f2fs_fs.h>  #include <linux/buffer_head.h>  #include <linux/writeback.h> +#include <linux/bitops.h>  #include "f2fs.h"  #include "node.h" @@ -21,20 +22,49 @@  void f2fs_set_inode_flags(struct inode *inode)  {  	unsigned int flags = F2FS_I(inode)->i_flags; - -	inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | -			S_NOATIME | S_DIRSYNC); +	unsigned int new_fl = 0;  	if (flags & FS_SYNC_FL) -		inode->i_flags |= S_SYNC; +		new_fl |= S_SYNC;  	if (flags & FS_APPEND_FL) -		inode->i_flags |= S_APPEND; +		new_fl |= S_APPEND;  	if (flags & FS_IMMUTABLE_FL) -		inode->i_flags |= S_IMMUTABLE; +		new_fl |= S_IMMUTABLE;  	if (flags & FS_NOATIME_FL) -		inode->i_flags |= S_NOATIME; +		new_fl |= S_NOATIME;  	if (flags & FS_DIRSYNC_FL) -		inode->i_flags |= S_DIRSYNC; +		new_fl |= S_DIRSYNC; +	set_mask_bits(&inode->i_flags, +			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); +} + +static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +{ +	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || +			S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { +		if (ri->i_addr[0]) +			inode->i_rdev = +				old_decode_dev(le32_to_cpu(ri->i_addr[0])); +		else +			inode->i_rdev = +				new_decode_dev(le32_to_cpu(ri->i_addr[1])); +	} +} + +static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +{ +	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { +		if (old_valid_dev(inode->i_rdev)) { +			ri->i_addr[0] = +				cpu_to_le32(old_encode_dev(inode->i_rdev)); +			ri->i_addr[1] = 0; +		} else { +			ri->i_addr[0] = 0; +			ri->i_addr[1] = +				cpu_to_le32(new_encode_dev(inode->i_rdev)); +			ri->i_addr[2] = 0; +		} +	}  }  static int do_read_inode(struct inode *inode) @@ -42,13 +72,13 @@ static int do_read_inode(struct inode *inode)  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	struct f2fs_inode_info *fi = F2FS_I(inode);  	struct page *node_page; -	struct f2fs_node *rn;  	struct f2fs_inode *ri;  	/* Check if ino is within scope */  	if (check_nid_range(sbi, inode->i_ino)) {  		f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",  			 (unsigned long) inode->i_ino); +		WARN_ON(1);  		return -EINVAL;  	} @@ -56,8 +86,7 @@ static int do_read_inode(struct inode *inode)  	if (IS_ERR(node_page))  		return PTR_ERR(node_page); -	rn = F2FS_NODE(node_page); -	ri = &(rn->i); +	ri = F2FS_INODE(node_page);  	inode->i_mode = le16_to_cpu(ri->i_mode);  	i_uid_write(inode, le32_to_cpu(ri->i_uid)); @@ -73,10 +102,6 @@ static int do_read_inode(struct inode *inode)  	inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);  	inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);  	inode->i_generation = le32_to_cpu(ri->i_generation); -	if (ri->i_addr[0]) -		inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); -	else -		inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));  	fi->i_current_depth = le32_to_cpu(ri->i_current_depth);  	fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); @@ -84,8 +109,14 @@ static int do_read_inode(struct inode *inode)  	fi->flags = 0;  	fi->i_advise = ri->i_advise;  	fi->i_pino = le32_to_cpu(ri->i_pino); +	fi->i_dir_level = ri->i_dir_level; +  	get_extent_info(&fi->ext, ri->i_ext);  	get_inline_info(fi, ri); + +	/* get rdev by using inline_info */ +	__get_inode_rdev(inode, ri); +  	f2fs_put_page(node_page, 1);  	return 0;  } @@ -149,13 +180,11 @@ bad_inode:  void update_inode(struct inode *inode, struct page *node_page)  { -	struct f2fs_node *rn;  	struct f2fs_inode *ri; -	f2fs_wait_on_page_writeback(node_page, NODE, false); +	f2fs_wait_on_page_writeback(node_page, NODE); -	rn = F2FS_NODE(node_page); -	ri = &(rn->i); +	ri = F2FS_INODE(node_page);  	ri->i_mode = cpu_to_le16(inode->i_mode);  	ri->i_advise = F2FS_I(inode)->i_advise; @@ -178,43 +207,38 @@ void update_inode(struct inode *inode, struct page *node_page)  	ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);  	ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);  	ri->i_generation = cpu_to_le32(inode->i_generation); +	ri->i_dir_level = F2FS_I(inode)->i_dir_level; -	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { -		if (old_valid_dev(inode->i_rdev)) { -			ri->i_addr[0] = -				cpu_to_le32(old_encode_dev(inode->i_rdev)); -			ri->i_addr[1] = 0; -		} else { -			ri->i_addr[0] = 0; -			ri->i_addr[1] = -				cpu_to_le32(new_encode_dev(inode->i_rdev)); -			ri->i_addr[2] = 0; -		} -	} - +	__set_inode_rdev(inode, ri);  	set_cold_node(inode, node_page);  	set_page_dirty(node_page); +  	clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);  } -int update_inode_page(struct inode *inode) +void update_inode_page(struct inode *inode)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	struct page *node_page; - +retry:  	node_page = get_node_page(sbi, inode->i_ino); -	if (IS_ERR(node_page)) -		return PTR_ERR(node_page); - +	if (IS_ERR(node_page)) { +		int err = PTR_ERR(node_page); +		if (err == -ENOMEM) { +			cond_resched(); +			goto retry; +		} else if (err != -ENOENT) { +			f2fs_stop_checkpoint(sbi); +		} +		return; +	}  	update_inode(inode, node_page);  	f2fs_put_page(node_page, 1); -	return 0;  }  int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	int ret, ilock;  	if (inode->i_ino == F2FS_NODE_INO(sbi) ||  			inode->i_ino == F2FS_META_INO(sbi)) @@ -227,14 +251,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)  	 * We need to lock here to prevent from producing dirty node pages  	 * during the urgent cleaning time when runing out of free sections.  	 */ -	ilock = mutex_lock_op(sbi); -	ret = update_inode_page(inode); -	mutex_unlock_op(sbi, ilock); +	f2fs_lock_op(sbi); +	update_inode_page(inode); +	f2fs_unlock_op(sbi);  	if (wbc)  		f2fs_balance_fs(sbi); -	return ret; +	return 0;  }  /* @@ -243,16 +267,15 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)  void f2fs_evict_inode(struct inode *inode)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	int ilock;  	trace_f2fs_evict_inode(inode); -	truncate_inode_pages(&inode->i_data, 0); +	truncate_inode_pages_final(&inode->i_data);  	if (inode->i_ino == F2FS_NODE_INO(sbi) ||  			inode->i_ino == F2FS_META_INO(sbi))  		goto no_delete; -	BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); +	f2fs_bug_on(get_dirty_dents(inode));  	remove_dirty_dir_inode(inode);  	if (inode->i_nlink || is_bad_inode(inode)) @@ -265,11 +288,13 @@ void f2fs_evict_inode(struct inode *inode)  	if (F2FS_HAS_BLOCKS(inode))  		f2fs_truncate(inode); -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	remove_inode_page(inode); -	mutex_unlock_op(sbi, ilock); +	stat_dec_inline_inode(inode); +	f2fs_unlock_op(sbi);  	sb_end_intwrite(inode->i_sb);  no_delete:  	clear_inode(inode); +	invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);  } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2a5359c990f..a6bdddc33ce 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -27,32 +27,23 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)  	nid_t ino;  	struct inode *inode;  	bool nid_free = false; -	int err, ilock; +	int err;  	inode = new_inode(sb);  	if (!inode)  		return ERR_PTR(-ENOMEM); -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	if (!alloc_nid(sbi, &ino)) { -		mutex_unlock_op(sbi, ilock); +		f2fs_unlock_op(sbi);  		err = -ENOSPC;  		goto fail;  	} -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi); -	inode->i_uid = current_fsuid(); - -	if (dir->i_mode & S_ISGID) { -		inode->i_gid = dir->i_gid; -		if (S_ISDIR(mode)) -			mode |= S_ISGID; -	} else { -		inode->i_gid = current_fsgid(); -	} +	inode_init_owner(inode, dir, mode);  	inode->i_ino = ino; -	inode->i_mode = mode;  	inode->i_blocks = 0;  	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;  	inode->i_generation = sbi->s_next_generation++; @@ -115,7 +106,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,  	struct f2fs_sb_info *sbi = F2FS_SB(sb);  	struct inode *inode;  	nid_t ino = 0; -	int err, ilock; +	int err;  	f2fs_balance_fs(sbi); @@ -131,9 +122,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,  	inode->i_mapping->a_ops = &f2fs_dblock_aops;  	ino = inode->i_ino; -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	err = f2fs_add_link(dentry, inode); -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  	if (err)  		goto out; @@ -157,7 +148,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,  	struct inode *inode = old_dentry->d_inode;  	struct super_block *sb = dir->i_sb;  	struct f2fs_sb_info *sbi = F2FS_SB(sb); -	int err, ilock; +	int err;  	f2fs_balance_fs(sbi); @@ -165,9 +156,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,  	ihold(inode);  	set_inode_flag(F2FS_I(inode), FI_INC_LINK); -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	err = f2fs_add_link(dentry, inode); -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  	if (err)  		goto out; @@ -207,6 +198,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,  		inode = f2fs_iget(dir->i_sb, ino);  		if (IS_ERR(inode))  			return ERR_CAST(inode); + +		stat_inc_inline_inode(inode);  	}  	return d_splice_alias(inode, dentry); @@ -220,7 +213,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)  	struct f2fs_dir_entry *de;  	struct page *page;  	int err = -ENOENT; -	int ilock;  	trace_f2fs_unlink_enter(dir, dentry);  	f2fs_balance_fs(sbi); @@ -229,16 +221,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)  	if (!de)  		goto fail; +	f2fs_lock_op(sbi);  	err = acquire_orphan_inode(sbi);  	if (err) { +		f2fs_unlock_op(sbi);  		kunmap(page);  		f2fs_put_page(page, 0);  		goto fail;  	} - -	ilock = mutex_lock_op(sbi);  	f2fs_delete_entry(de, page, inode); -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  	/* In order to evict this inode,  we set it dirty */  	mark_inode_dirty(inode); @@ -254,7 +246,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,  	struct f2fs_sb_info *sbi = F2FS_SB(sb);  	struct inode *inode;  	size_t symlen = strlen(symname) + 1; -	int err, ilock; +	int err;  	f2fs_balance_fs(sbi); @@ -265,9 +257,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,  	inode->i_op = &f2fs_symlink_inode_operations;  	inode->i_mapping->a_ops = &f2fs_dblock_aops; -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	err = f2fs_add_link(dentry, inode); -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  	if (err)  		goto out; @@ -290,7 +282,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)  {  	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);  	struct inode *inode; -	int err, ilock; +	int err;  	f2fs_balance_fs(sbi); @@ -304,9 +296,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)  	mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);  	set_inode_flag(F2FS_I(inode), FI_INC_LINK); -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	err = f2fs_add_link(dentry, inode); -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  	if (err)  		goto out_fail; @@ -342,7 +334,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,  	struct f2fs_sb_info *sbi = F2FS_SB(sb);  	struct inode *inode;  	int err = 0; -	int ilock;  	if (!new_valid_dev(rdev))  		return -EINVAL; @@ -356,9 +347,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,  	init_special_inode(inode, inode->i_mode, rdev);  	inode->i_op = &f2fs_special_inode_operations; -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	err = f2fs_add_link(dentry, inode); -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  	if (err)  		goto out; @@ -387,7 +378,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  	struct f2fs_dir_entry *old_dir_entry = NULL;  	struct f2fs_dir_entry *old_entry;  	struct f2fs_dir_entry *new_entry; -	int err = -ENOENT, ilock = -1; +	int err = -ENOENT;  	f2fs_balance_fs(sbi); @@ -402,7 +393,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  			goto out_old;  	} -	ilock = mutex_lock_op(sbi); +	f2fs_lock_op(sbi);  	if (new_inode) { @@ -428,9 +419,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  		f2fs_set_link(new_dir, new_entry, new_page, old_inode);  		new_inode->i_ctime = CURRENT_TIME; +		down_write(&F2FS_I(new_inode)->i_sem);  		if (old_dir_entry)  			drop_nlink(new_inode);  		drop_nlink(new_inode); +		up_write(&F2FS_I(new_inode)->i_sem); + +		mark_inode_dirty(new_inode);  		if (!new_inode->i_nlink)  			add_orphan_inode(sbi, new_inode->i_ino); @@ -450,6 +445,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  		}  	} +	down_write(&F2FS_I(old_inode)->i_sem); +	file_lost_pino(old_inode); +	up_write(&F2FS_I(old_inode)->i_sem); +  	old_inode->i_ctime = CURRENT_TIME;  	mark_inode_dirty(old_inode); @@ -459,25 +458,28 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,  		if (old_dir != new_dir) {  			f2fs_set_link(old_inode, old_dir_entry,  						old_dir_page, new_dir); +			update_inode_page(old_inode);  		} else {  			kunmap(old_dir_page);  			f2fs_put_page(old_dir_page, 0);  		}  		drop_nlink(old_dir); +		mark_inode_dirty(old_dir);  		update_inode_page(old_dir);  	} -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  	return 0;  put_out_dir: -	f2fs_put_page(new_page, 1); +	kunmap(new_page); +	f2fs_put_page(new_page, 0);  out_dir:  	if (old_dir_entry) {  		kunmap(old_dir_page);  		f2fs_put_page(old_dir_page, 0);  	} -	mutex_unlock_op(sbi, ilock); +	f2fs_unlock_op(sbi);  out_old:  	kunmap(old_page);  	f2fs_put_page(old_page, 0); @@ -498,6 +500,7 @@ const struct inode_operations f2fs_dir_inode_operations = {  	.getattr	= f2fs_getattr,  	.setattr	= f2fs_setattr,  	.get_acl	= f2fs_get_acl, +	.set_acl	= f2fs_set_acl,  #ifdef CONFIG_F2FS_FS_XATTR  	.setxattr	= generic_setxattr,  	.getxattr	= generic_getxattr, @@ -524,6 +527,7 @@ const struct inode_operations f2fs_special_inode_operations = {  	.getattr	= f2fs_getattr,  	.setattr        = f2fs_setattr,  	.get_acl	= f2fs_get_acl, +	.set_acl	= f2fs_set_acl,  #ifdef CONFIG_F2FS_FS_XATTR  	.setxattr       = generic_setxattr,  	.getxattr       = generic_getxattr, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 51ef2789443..4b697ccc9b0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -21,9 +21,35 @@  #include "segment.h"  #include <trace/events/f2fs.h> +#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) +  static struct kmem_cache *nat_entry_slab;  static struct kmem_cache *free_nid_slab; +bool available_free_memory(struct f2fs_sb_info *sbi, int type) +{ +	struct f2fs_nm_info *nm_i = NM_I(sbi); +	struct sysinfo val; +	unsigned long mem_size = 0; +	bool res = false; + +	si_meminfo(&val); +	/* give 25%, 25%, 50% memory for each components respectively */ +	if (type == FREE_NIDS) { +		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; +		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); +	} else if (type == NAT_ENTRIES) { +		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; +		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); +	} else if (type == DIRTY_DENTS) { +		if (sbi->sb->s_bdi->dirty_exceeded) +			return false; +		mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); +		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); +	} +	return res; +} +  static void clear_node_page_dirty(struct page *page)  {  	struct address_space *mapping = page->mapping; @@ -82,40 +108,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)  	return dst_page;  } -/* - * Readahead NAT pages - */ -static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) -{ -	struct address_space *mapping = sbi->meta_inode->i_mapping; -	struct f2fs_nm_info *nm_i = NM_I(sbi); -	struct blk_plug plug; -	struct page *page; -	pgoff_t index; -	int i; - -	blk_start_plug(&plug); - -	for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { -		if (nid >= nm_i->max_nid) -			nid = 0; -		index = current_nat_addr(sbi, nid); - -		page = grab_cache_page(mapping, index); -		if (!page) -			continue; -		if (PageUptodate(page)) { -			f2fs_put_page(page, 1); -			continue; -		} -		if (f2fs_readpage(sbi, page, index, READ)) -			continue; - -		f2fs_put_page(page, 0); -	} -	blk_finish_plug(&plug); -} -  static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)  {  	return radix_tree_lookup(&nm_i->nat_root, n); @@ -149,6 +141,32 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)  	return is_cp;  } +bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) +{ +	struct f2fs_nm_info *nm_i = NM_I(sbi); +	struct nat_entry *e; +	bool fsync_done = false; + +	read_lock(&nm_i->nat_tree_lock); +	e = __lookup_nat_cache(nm_i, nid); +	if (e) +		fsync_done = e->fsync_done; +	read_unlock(&nm_i->nat_tree_lock); +	return fsync_done; +} + +void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) +{ +	struct f2fs_nm_info *nm_i = NM_I(sbi); +	struct nat_entry *e; + +	write_lock(&nm_i->nat_tree_lock); +	e = __lookup_nat_cache(nm_i, nid); +	if (e) +		e->fsync_done = false; +	write_unlock(&nm_i->nat_tree_lock); +} +  static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)  {  	struct nat_entry *new; @@ -162,6 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)  	}  	memset(new, 0, sizeof(struct nat_entry));  	nat_set_nid(new, nid); +	new->checkpointed = true;  	list_add_tail(&new->list, &nm_i->nat_entries);  	nm_i->nat_cnt++;  	return new; @@ -180,16 +199,13 @@ retry:  			write_unlock(&nm_i->nat_tree_lock);  			goto retry;  		} -		nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); -		nat_set_ino(e, le32_to_cpu(ne->ino)); -		nat_set_version(e, ne->version); -		e->checkpointed = true; +		node_info_from_raw_nat(&e->ni, ne);  	}  	write_unlock(&nm_i->nat_tree_lock);  }  static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, -			block_t new_blkaddr) +			block_t new_blkaddr, bool fsync_done)  {  	struct f2fs_nm_info *nm_i = NM_I(sbi);  	struct nat_entry *e; @@ -203,8 +219,7 @@ retry:  			goto retry;  		}  		e->ni = *ni; -		e->checkpointed = true; -		BUG_ON(ni->blk_addr == NEW_ADDR); +		f2fs_bug_on(ni->blk_addr == NEW_ADDR);  	} else if (new_blkaddr == NEW_ADDR) {  		/*  		 * when nid is reallocated, @@ -212,19 +227,16 @@ retry:  		 * So, reinitialize it with new information.  		 */  		e->ni = *ni; -		BUG_ON(ni->blk_addr != NULL_ADDR); +		f2fs_bug_on(ni->blk_addr != NULL_ADDR);  	} -	if (new_blkaddr == NEW_ADDR) -		e->checkpointed = false; -  	/* sanity check */ -	BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); -	BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && +	f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); +	f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&  			new_blkaddr == NULL_ADDR); -	BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && +	f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR &&  			new_blkaddr == NEW_ADDR); -	BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && +	f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR &&  			nat_get_blkaddr(e) != NULL_ADDR &&  			new_blkaddr == NEW_ADDR); @@ -237,14 +249,19 @@ retry:  	/* change address */  	nat_set_blkaddr(e, new_blkaddr);  	__set_nat_cache_dirty(nm_i, e); + +	/* update fsync_mark if its inode nat entry is still alive */ +	e = __lookup_nat_cache(nm_i, ni->ino); +	if (e) +		e->fsync_done = fsync_done;  	write_unlock(&nm_i->nat_tree_lock);  } -static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)  {  	struct f2fs_nm_info *nm_i = NM_I(sbi); -	if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) +	if (available_free_memory(sbi, NAT_ENTRIES))  		return 0;  	write_lock(&nm_i->nat_tree_lock); @@ -391,8 +408,8 @@ got:  /*   * Caller should call f2fs_put_dnode(dn). - * Also, it should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op() only if ro is not set RDONLY_NODE. + * Also, it should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op() only if ro is not set RDONLY_NODE.   * In the case of RDONLY_NODE, we don't need to care about mutex.   */  int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) @@ -495,15 +512,15 @@ static void truncate_node(struct dnode_of_data *dn)  	get_node_info(sbi, dn->nid, &ni);  	if (dn->inode->i_blocks == 0) { -		BUG_ON(ni.blk_addr != NULL_ADDR); +		f2fs_bug_on(ni.blk_addr != NULL_ADDR);  		goto invalidate;  	} -	BUG_ON(ni.blk_addr == NULL_ADDR); +	f2fs_bug_on(ni.blk_addr == NULL_ADDR);  	/* Deallocate node address */  	invalidate_blocks(sbi, ni.blk_addr); -	dec_valid_node_count(sbi, dn->inode, 1); -	set_node_addr(sbi, &ni, NULL_ADDR); +	dec_valid_node_count(sbi, dn->inode); +	set_node_addr(sbi, &ni, NULL_ADDR, false);  	if (dn->nid == dn->inode->i_ino) {  		remove_orphan_inode(sbi, dn->nid); @@ -516,6 +533,10 @@ invalidate:  	F2FS_SET_SB_DIRT(sbi);  	f2fs_put_page(dn->node_page, 1); + +	invalidate_mapping_pages(NODE_MAPPING(sbi), +			dn->node_page->index, dn->node_page->index); +  	dn->node_page = NULL;  	trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);  } @@ -631,19 +652,19 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,  		return 0;  	/* get indirect nodes in the path */ -	for (i = 0; i < depth - 1; i++) { +	for (i = 0; i < idx + 1; i++) {  		/* refernece count'll be increased */  		pages[i] = get_node_page(sbi, nid[i]);  		if (IS_ERR(pages[i])) { -			depth = i + 1;  			err = PTR_ERR(pages[i]); +			idx = i - 1;  			goto fail;  		}  		nid[i + 1] = get_nid(pages[i], offset[i + 1], false);  	}  	/* free direct nodes linked to a partial indirect node */ -	for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { +	for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {  		child_nid = get_nid(pages[idx], i, false);  		if (!child_nid)  			continue; @@ -654,7 +675,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,  		set_nid(pages[idx], i, 0, false);  	} -	if (offset[depth - 1] == 0) { +	if (offset[idx + 1] == 0) {  		dn->node_page = pages[idx];  		dn->nid = nid[idx];  		truncate_node(dn); @@ -662,9 +683,10 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,  		f2fs_put_page(pages[idx], 1);  	}  	offset[idx]++; -	offset[depth - 1] = 0; +	offset[idx + 1] = 0; +	idx--;  fail: -	for (i = depth - 3; i >= 0; i--) +	for (i = idx; i >= 0; i--)  		f2fs_put_page(pages[i], 1);  	trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); @@ -678,11 +700,10 @@ fail:  int truncate_inode_blocks(struct inode *inode, pgoff_t from)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -	struct address_space *node_mapping = sbi->node_inode->i_mapping;  	int err = 0, cont = 1;  	int level, offset[4], noffset[4];  	unsigned int nofs = 0; -	struct f2fs_node *rn; +	struct f2fs_inode *ri;  	struct dnode_of_data dn;  	struct page *page; @@ -699,7 +720,7 @@ restart:  	set_new_dnode(&dn, inode, page, NULL, 0);  	unlock_page(page); -	rn = F2FS_NODE(page); +	ri = F2FS_INODE(page);  	switch (level) {  	case 0:  	case 1: @@ -709,7 +730,7 @@ restart:  		nofs = noffset[1];  		if (!offset[level - 1])  			goto skip_partial; -		err = truncate_partial_nodes(&dn, &rn->i, offset, level); +		err = truncate_partial_nodes(&dn, ri, offset, level);  		if (err < 0 && err != -ENOENT)  			goto fail;  		nofs += 1 + NIDS_PER_BLOCK; @@ -718,7 +739,7 @@ restart:  		nofs = 5 + 2 * NIDS_PER_BLOCK;  		if (!offset[level - 1])  			goto skip_partial; -		err = truncate_partial_nodes(&dn, &rn->i, offset, level); +		err = truncate_partial_nodes(&dn, ri, offset, level);  		if (err < 0 && err != -ENOENT)  			goto fail;  		break; @@ -728,7 +749,7 @@ restart:  skip_partial:  	while (cont) { -		dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); +		dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);  		switch (offset[0]) {  		case NODE_DIR1_BLOCK:  		case NODE_DIR2_BLOCK: @@ -751,14 +772,14 @@ skip_partial:  		if (err < 0 && err != -ENOENT)  			goto fail;  		if (offset[1] == 0 && -				rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { +				ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {  			lock_page(page); -			if (page->mapping != node_mapping) { +			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {  				f2fs_put_page(page, 1);  				goto restart;  			} -			wait_on_page_writeback(page); -			rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; +			f2fs_wait_on_page_writeback(page, NODE); +			ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;  			set_page_dirty(page);  			unlock_page(page);  		} @@ -794,38 +815,34 @@ int truncate_xattr_node(struct inode *inode, struct page *page)  	set_new_dnode(&dn, inode, page, npage, nid);  	if (page) -		dn.inode_page_locked = 1; +		dn.inode_page_locked = true;  	truncate_node(&dn);  	return 0;  }  /* - * Caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). + * Caller should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op().   */ -int remove_inode_page(struct inode *inode) +void remove_inode_page(struct inode *inode)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	struct page *page;  	nid_t ino = inode->i_ino;  	struct dnode_of_data dn; -	int err;  	page = get_node_page(sbi, ino);  	if (IS_ERR(page)) -		return PTR_ERR(page); +		return; -	err = truncate_xattr_node(inode, page); -	if (err) { +	if (truncate_xattr_node(inode, page)) {  		f2fs_put_page(page, 1); -		return err; +		return;  	} -  	/* 0 is possible, after f2fs_new_inode() is failed */ -	BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1); +	f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);  	set_new_dnode(&dn, inode, page, page, ino);  	truncate_node(&dn); -	return 0;  }  struct page *new_inode_page(struct inode *inode, const struct qstr *name) @@ -843,19 +860,18 @@ struct page *new_node_page(struct dnode_of_data *dn,  				unsigned int ofs, struct page *ipage)  {  	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); -	struct address_space *mapping = sbi->node_inode->i_mapping;  	struct node_info old_ni, new_ni;  	struct page *page;  	int err; -	if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) +	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))  		return ERR_PTR(-EPERM); -	page = grab_cache_page(mapping, dn->nid); +	page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);  	if (!page)  		return ERR_PTR(-ENOMEM); -	if (!inc_valid_node_count(sbi, dn->inode, 1)) { +	if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {  		err = -ENOSPC;  		goto fail;  	} @@ -863,17 +879,18 @@ struct page *new_node_page(struct dnode_of_data *dn,  	get_node_info(sbi, dn->nid, &old_ni);  	/* Reinitialize old_ni with new node page */ -	BUG_ON(old_ni.blk_addr != NULL_ADDR); +	f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);  	new_ni = old_ni;  	new_ni.ino = dn->inode->i_ino; -	set_node_addr(sbi, &new_ni, NEW_ADDR); +	set_node_addr(sbi, &new_ni, NEW_ADDR, false); +	f2fs_wait_on_page_writeback(page, NODE);  	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);  	set_cold_node(dn->inode, page);  	SetPageUptodate(page);  	set_page_dirty(page); -	if (ofs == XATTR_NODE_OFFSET) +	if (f2fs_has_xattr_block(ofs))  		F2FS_I(dn->inode)->i_xattr_nid = dn->nid;  	dn->node_page = page; @@ -898,14 +915,14 @@ fail:   * LOCKED_PAGE: f2fs_put_page(page, 1)   * error: nothing   */ -static int read_node_page(struct page *page, int type) +static int read_node_page(struct page *page, int rw)  {  	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);  	struct node_info ni;  	get_node_info(sbi, page->index, &ni); -	if (ni.blk_addr == NULL_ADDR) { +	if (unlikely(ni.blk_addr == NULL_ADDR)) {  		f2fs_put_page(page, 1);  		return -ENOENT;  	} @@ -913,7 +930,7 @@ static int read_node_page(struct page *page, int type)  	if (PageUptodate(page))  		return LOCKED_PAGE; -	return f2fs_readpage(sbi, page, ni.blk_addr, type); +	return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);  }  /* @@ -921,18 +938,17 @@ static int read_node_page(struct page *page, int type)   */  void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)  { -	struct address_space *mapping = sbi->node_inode->i_mapping;  	struct page *apage;  	int err; -	apage = find_get_page(mapping, nid); +	apage = find_get_page(NODE_MAPPING(sbi), nid);  	if (apage && PageUptodate(apage)) {  		f2fs_put_page(apage, 0);  		return;  	}  	f2fs_put_page(apage, 0); -	apage = grab_cache_page(mapping, nid); +	apage = grab_cache_page(NODE_MAPPING(sbi), nid);  	if (!apage)  		return; @@ -945,11 +961,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)  struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)  { -	struct address_space *mapping = sbi->node_inode->i_mapping;  	struct page *page;  	int err;  repeat: -	page = grab_cache_page(mapping, nid); +	page = grab_cache_page(NODE_MAPPING(sbi), nid);  	if (!page)  		return ERR_PTR(-ENOMEM); @@ -960,17 +975,15 @@ repeat:  		goto got_it;  	lock_page(page); -	if (!PageUptodate(page)) { +	if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {  		f2fs_put_page(page, 1);  		return ERR_PTR(-EIO);  	} -	if (page->mapping != mapping) { +	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {  		f2fs_put_page(page, 1);  		goto repeat;  	}  got_it: -	BUG_ON(nid != nid_of_node(page)); -	mark_page_accessed(page);  	return page;  } @@ -981,7 +994,6 @@ got_it:  struct page *get_node_page_ra(struct page *parent, int start)  {  	struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); -	struct address_space *mapping = sbi->node_inode->i_mapping;  	struct blk_plug plug;  	struct page *page;  	int err, i, end; @@ -992,7 +1004,7 @@ struct page *get_node_page_ra(struct page *parent, int start)  	if (!nid)  		return ERR_PTR(-ENOENT);  repeat: -	page = grab_cache_page(mapping, nid); +	page = grab_cache_page(NODE_MAPPING(sbi), nid);  	if (!page)  		return ERR_PTR(-ENOMEM); @@ -1017,16 +1029,15 @@ repeat:  	blk_finish_plug(&plug);  	lock_page(page); -	if (page->mapping != mapping) { +	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {  		f2fs_put_page(page, 1);  		goto repeat;  	}  page_hit: -	if (!PageUptodate(page)) { +	if (unlikely(!PageUptodate(page))) {  		f2fs_put_page(page, 1);  		return ERR_PTR(-EIO);  	} -	mark_page_accessed(page);  	return page;  } @@ -1048,7 +1059,6 @@ void sync_inode_page(struct dnode_of_data *dn)  int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,  					struct writeback_control *wbc)  { -	struct address_space *mapping = sbi->node_inode->i_mapping;  	pgoff_t index, end;  	struct pagevec pvec;  	int step = ino ? 2 : 0; @@ -1062,7 +1072,7 @@ next_step:  	while (index <= end) {  		int i, nr_pages; -		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, +		nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,  				PAGECACHE_TAG_DIRTY,  				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);  		if (nr_pages == 0) @@ -1095,7 +1105,7 @@ next_step:  			else if (!trylock_page(page))  				continue; -			if (unlikely(page->mapping != mapping)) { +			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {  continue_unlock:  				unlock_page(page);  				continue; @@ -1122,7 +1132,7 @@ continue_unlock:  				set_fsync_mark(page, 0);  				set_dentry_mark(page, 0);  			} -			mapping->a_ops->writepage(page, wbc); +			NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);  			wrote++;  			if (--wbc->nr_to_write == 0) @@ -1143,11 +1153,52 @@ continue_unlock:  	}  	if (wrote) -		f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); - +		f2fs_submit_merged_bio(sbi, NODE, WRITE);  	return nwritten;  } +int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) +{ +	pgoff_t index = 0, end = LONG_MAX; +	struct pagevec pvec; +	int ret2 = 0, ret = 0; + +	pagevec_init(&pvec, 0); + +	while (index <= end) { +		int i, nr_pages; +		nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, +				PAGECACHE_TAG_WRITEBACK, +				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); +		if (nr_pages == 0) +			break; + +		for (i = 0; i < nr_pages; i++) { +			struct page *page = pvec.pages[i]; + +			/* until radix tree lookup accepts end_index */ +			if (unlikely(page->index > end)) +				continue; + +			if (ino && ino_of_node(page) == ino) { +				f2fs_wait_on_page_writeback(page, NODE); +				if (TestClearPageError(page)) +					ret = -EIO; +			} +		} +		pagevec_release(&pvec); +		cond_resched(); +	} + +	if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags))) +		ret2 = -ENOSPC; +	if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags))) +		ret2 = -EIO; +	if (!ret) +		ret = ret2; +	return ret; +} +  static int f2fs_write_node_page(struct page *page,  				struct writeback_control *wbc)  { @@ -1155,66 +1206,71 @@ static int f2fs_write_node_page(struct page *page,  	nid_t nid;  	block_t new_addr;  	struct node_info ni; +	struct f2fs_io_info fio = { +		.type = NODE, +		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, +	}; + +	trace_f2fs_writepage(page, NODE); + +	if (unlikely(sbi->por_doing)) +		goto redirty_out; -	wait_on_page_writeback(page); +	f2fs_wait_on_page_writeback(page, NODE);  	/* get old block addr of this node page */  	nid = nid_of_node(page); -	BUG_ON(page->index != nid); +	f2fs_bug_on(page->index != nid);  	get_node_info(sbi, nid, &ni);  	/* This page is already truncated */ -	if (ni.blk_addr == NULL_ADDR) { +	if (unlikely(ni.blk_addr == NULL_ADDR)) {  		dec_page_count(sbi, F2FS_DIRTY_NODES);  		unlock_page(page);  		return 0;  	} -	if (wbc->for_reclaim) { -		dec_page_count(sbi, F2FS_DIRTY_NODES); -		wbc->pages_skipped++; -		set_page_dirty(page); -		return AOP_WRITEPAGE_ACTIVATE; -	} +	if (wbc->for_reclaim) +		goto redirty_out;  	mutex_lock(&sbi->node_write);  	set_page_writeback(page); -	write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); -	set_node_addr(sbi, &ni, new_addr); +	write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); +	set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));  	dec_page_count(sbi, F2FS_DIRTY_NODES);  	mutex_unlock(&sbi->node_write);  	unlock_page(page);  	return 0; + +redirty_out: +	redirty_page_for_writepage(wbc, page); +	return AOP_WRITEPAGE_ACTIVATE;  } -/* - * It is very important to gather dirty pages and write at once, so that we can - * submit a big bio without interfering other data writes. - * Be default, 512 pages (2MB) * 3 node types, is more reasonable. - */ -#define COLLECT_DIRTY_NODES	1536  static int f2fs_write_node_pages(struct address_space *mapping,  			    struct writeback_control *wbc)  {  	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); -	long nr_to_write = wbc->nr_to_write; +	long diff; -	/* First check balancing cached NAT entries */ -	if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { -		f2fs_sync_fs(sbi->sb, true); -		return 0; -	} +	trace_f2fs_writepages(mapping->host, wbc, NODE); + +	/* balancing f2fs's metadata in background */ +	f2fs_balance_fs_bg(sbi);  	/* collect a number of dirty node pages and write together */ -	if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) -		return 0; +	if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) +		goto skip_write; -	/* if mounting is failed, skip writing node pages */ -	wbc->nr_to_write = 3 * max_hw_blocks(sbi); +	diff = nr_pages_to_write(sbi, NODE, wbc); +	wbc->sync_mode = WB_SYNC_NONE;  	sync_node_pages(sbi, 0, wbc); -	wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - -						wbc->nr_to_write); +	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); +	return 0; + +skip_write: +	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);  	return 0;  } @@ -1223,6 +1279,8 @@ static int f2fs_set_node_page_dirty(struct page *page)  	struct address_space *mapping = page->mapping;  	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); +	trace_f2fs_set_page_dirty(page, NODE); +  	SetPageUptodate(page);  	if (!PageDirty(page)) {  		__set_page_dirty_nobuffers(page); @@ -1260,59 +1318,51 @@ const struct address_space_operations f2fs_node_aops = {  	.releasepage	= f2fs_release_node_page,  }; -static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) +static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, +						nid_t n)  { -	struct list_head *this; -	struct free_nid *i; -	list_for_each(this, head) { -		i = list_entry(this, struct free_nid, list); -		if (i->nid == n) -			return i; -	} -	return NULL; +	return radix_tree_lookup(&nm_i->free_nid_root, n);  } -static void __del_from_free_nid_list(struct free_nid *i) +static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, +						struct free_nid *i)  {  	list_del(&i->list); -	kmem_cache_free(free_nid_slab, i); +	radix_tree_delete(&nm_i->free_nid_root, i->nid);  } -static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) +static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)  { +	struct f2fs_nm_info *nm_i = NM_I(sbi);  	struct free_nid *i;  	struct nat_entry *ne;  	bool allocated = false; -	if (nm_i->fcnt > 2 * MAX_FREE_NIDS) +	if (!available_free_memory(sbi, FREE_NIDS))  		return -1;  	/* 0 nid should not be used */ -	if (nid == 0) +	if (unlikely(nid == 0))  		return 0; -	if (!build) -		goto retry; - -	/* do not add allocated nids */ -	read_lock(&nm_i->nat_tree_lock); -	ne = __lookup_nat_cache(nm_i, nid); -	if (ne && nat_get_blkaddr(ne) != NULL_ADDR) -		allocated = true; -	read_unlock(&nm_i->nat_tree_lock); -	if (allocated) -		return 0; -retry: -	i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); -	if (!i) { -		cond_resched(); -		goto retry; +	if (build) { +		/* do not add allocated nids */ +		read_lock(&nm_i->nat_tree_lock); +		ne = __lookup_nat_cache(nm_i, nid); +		if (ne && +			(!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR)) +			allocated = true; +		read_unlock(&nm_i->nat_tree_lock); +		if (allocated) +			return 0;  	} + +	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);  	i->nid = nid;  	i->state = NID_NEW;  	spin_lock(&nm_i->free_nid_list_lock); -	if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { +	if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {  		spin_unlock(&nm_i->free_nid_list_lock);  		kmem_cache_free(free_nid_slab, i);  		return 0; @@ -1326,18 +1376,25 @@ retry:  static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)  {  	struct free_nid *i; +	bool need_free = false; +  	spin_lock(&nm_i->free_nid_list_lock); -	i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); +	i = __lookup_free_nid_list(nm_i, nid);  	if (i && i->state == NID_NEW) { -		__del_from_free_nid_list(i); +		__del_from_free_nid_list(nm_i, i);  		nm_i->fcnt--; +		need_free = true;  	}  	spin_unlock(&nm_i->free_nid_list_lock); + +	if (need_free) +		kmem_cache_free(free_nid_slab, i);  } -static void scan_nat_page(struct f2fs_nm_info *nm_i, +static void scan_nat_page(struct f2fs_sb_info *sbi,  			struct page *nat_page, nid_t start_nid)  { +	struct f2fs_nm_info *nm_i = NM_I(sbi);  	struct f2fs_nat_block *nat_blk = page_address(nat_page);  	block_t blk_addr;  	int i; @@ -1346,13 +1403,13 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,  	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { -		if (start_nid >= nm_i->max_nid) +		if (unlikely(start_nid >= nm_i->max_nid))  			break;  		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); -		BUG_ON(blk_addr == NEW_ADDR); +		f2fs_bug_on(blk_addr == NEW_ADDR);  		if (blk_addr == NULL_ADDR) { -			if (add_free_nid(nm_i, start_nid, true) < 0) +			if (add_free_nid(sbi, start_nid, true) < 0)  				break;  		}  	} @@ -1371,16 +1428,16 @@ static void build_free_nids(struct f2fs_sb_info *sbi)  		return;  	/* readahead nat pages to be scanned */ -	ra_nat_pages(sbi, nid); +	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);  	while (1) {  		struct page *page = get_current_nat_page(sbi, nid); -		scan_nat_page(nm_i, page, nid); +		scan_nat_page(sbi, page, nid);  		f2fs_put_page(page, 1);  		nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); -		if (nid >= nm_i->max_nid) +		if (unlikely(nid >= nm_i->max_nid))  			nid = 0;  		if (i++ == FREE_NID_PAGES) @@ -1396,7 +1453,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)  		block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);  		nid = le32_to_cpu(nid_in_journal(sum, i));  		if (addr == NULL_ADDR) -			add_free_nid(nm_i, nid, true); +			add_free_nid(sbi, nid, true);  		else  			remove_free_nid(nm_i, nid);  	} @@ -1412,23 +1469,20 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)  {  	struct f2fs_nm_info *nm_i = NM_I(sbi);  	struct free_nid *i = NULL; -	struct list_head *this;  retry: -	if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) +	if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))  		return false;  	spin_lock(&nm_i->free_nid_list_lock);  	/* We should not use stale free nids created by build_free_nids */ -	if (nm_i->fcnt && !sbi->on_build_free_nids) { -		BUG_ON(list_empty(&nm_i->free_nid_list)); -		list_for_each(this, &nm_i->free_nid_list) { -			i = list_entry(this, struct free_nid, list); +	if (nm_i->fcnt && !on_build_free_nids(nm_i)) { +		f2fs_bug_on(list_empty(&nm_i->free_nid_list)); +		list_for_each_entry(i, &nm_i->free_nid_list, list)  			if (i->state == NID_NEW)  				break; -		} -		BUG_ON(i->state != NID_NEW); +		f2fs_bug_on(i->state != NID_NEW);  		*nid = i->nid;  		i->state = NID_ALLOC;  		nm_i->fcnt--; @@ -1439,9 +1493,7 @@ retry:  	/* Let's scan nat pages and its caches to get free nids */  	mutex_lock(&nm_i->build_lock); -	sbi->on_build_free_nids = 1;  	build_free_nids(sbi); -	sbi->on_build_free_nids = 0;  	mutex_unlock(&nm_i->build_lock);  	goto retry;  } @@ -1455,10 +1507,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)  	struct free_nid *i;  	spin_lock(&nm_i->free_nid_list_lock); -	i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); -	BUG_ON(!i || i->state != NID_ALLOC); -	__del_from_free_nid_list(i); +	i = __lookup_free_nid_list(nm_i, nid); +	f2fs_bug_on(!i || i->state != NID_ALLOC); +	__del_from_free_nid_list(nm_i, i);  	spin_unlock(&nm_i->free_nid_list_lock); + +	kmem_cache_free(free_nid_slab, i);  }  /* @@ -1468,20 +1522,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)  {  	struct f2fs_nm_info *nm_i = NM_I(sbi);  	struct free_nid *i; +	bool need_free = false;  	if (!nid)  		return;  	spin_lock(&nm_i->free_nid_list_lock); -	i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); -	BUG_ON(!i || i->state != NID_ALLOC); -	if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { -		__del_from_free_nid_list(i); +	i = __lookup_free_nid_list(nm_i, nid); +	f2fs_bug_on(!i || i->state != NID_ALLOC); +	if (!available_free_memory(sbi, FREE_NIDS)) { +		__del_from_free_nid_list(nm_i, i); +		need_free = true;  	} else {  		i->state = NID_NEW;  		nm_i->fcnt++;  	}  	spin_unlock(&nm_i->free_nid_list_lock); + +	if (need_free) +		kmem_cache_free(free_nid_slab, i);  }  void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, @@ -1489,90 +1548,200 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,  		block_t new_blkaddr)  {  	rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); -	set_node_addr(sbi, ni, new_blkaddr); +	set_node_addr(sbi, ni, new_blkaddr, false);  	clear_node_page_dirty(page);  } +static void recover_inline_xattr(struct inode *inode, struct page *page) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	void *src_addr, *dst_addr; +	size_t inline_size; +	struct page *ipage; +	struct f2fs_inode *ri; + +	if (!f2fs_has_inline_xattr(inode)) +		return; + +	if (!IS_INODE(page)) +		return; + +	ri = F2FS_INODE(page); +	if (!(ri->i_inline & F2FS_INLINE_XATTR)) +		return; + +	ipage = get_node_page(sbi, inode->i_ino); +	f2fs_bug_on(IS_ERR(ipage)); + +	dst_addr = inline_xattr_addr(ipage); +	src_addr = inline_xattr_addr(page); +	inline_size = inline_xattr_size(inode); + +	f2fs_wait_on_page_writeback(ipage, NODE); +	memcpy(dst_addr, src_addr, inline_size); + +	update_inode(inode, ipage); +	f2fs_put_page(ipage, 1); +} + +bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; +	nid_t new_xnid = nid_of_node(page); +	struct node_info ni; + +	recover_inline_xattr(inode, page); + +	if (!f2fs_has_xattr_block(ofs_of_node(page))) +		return false; + +	/* 1: invalidate the previous xattr nid */ +	if (!prev_xnid) +		goto recover_xnid; + +	/* Deallocate node address */ +	get_node_info(sbi, prev_xnid, &ni); +	f2fs_bug_on(ni.blk_addr == NULL_ADDR); +	invalidate_blocks(sbi, ni.blk_addr); +	dec_valid_node_count(sbi, inode); +	set_node_addr(sbi, &ni, NULL_ADDR, false); + +recover_xnid: +	/* 2: allocate new xattr nid */ +	if (unlikely(!inc_valid_node_count(sbi, inode))) +		f2fs_bug_on(1); + +	remove_free_nid(NM_I(sbi), new_xnid); +	get_node_info(sbi, new_xnid, &ni); +	ni.ino = inode->i_ino; +	set_node_addr(sbi, &ni, NEW_ADDR, false); +	F2FS_I(inode)->i_xattr_nid = new_xnid; + +	/* 3: update xattr blkaddr */ +	refresh_sit_entry(sbi, NEW_ADDR, blkaddr); +	set_node_addr(sbi, &ni, blkaddr, false); + +	update_inode_page(inode); +	return true; +} +  int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)  { -	struct address_space *mapping = sbi->node_inode->i_mapping; -	struct f2fs_node *src, *dst; +	struct f2fs_inode *src, *dst;  	nid_t ino = ino_of_node(page);  	struct node_info old_ni, new_ni;  	struct page *ipage; -	ipage = grab_cache_page(mapping, ino); +	get_node_info(sbi, ino, &old_ni); + +	if (unlikely(old_ni.blk_addr != NULL_ADDR)) +		return -EINVAL; + +	ipage = grab_cache_page(NODE_MAPPING(sbi), ino);  	if (!ipage)  		return -ENOMEM;  	/* Should not use this inode  from free nid list */  	remove_free_nid(NM_I(sbi), ino); -	get_node_info(sbi, ino, &old_ni);  	SetPageUptodate(ipage);  	fill_node_footer(ipage, ino, ino, 0, true); -	src = F2FS_NODE(page); -	dst = F2FS_NODE(ipage); +	src = F2FS_INODE(page); +	dst = F2FS_INODE(ipage); -	memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); -	dst->i.i_size = 0; -	dst->i.i_blocks = cpu_to_le64(1); -	dst->i.i_links = cpu_to_le32(1); -	dst->i.i_xattr_nid = 0; +	memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src); +	dst->i_size = 0; +	dst->i_blocks = cpu_to_le64(1); +	dst->i_links = cpu_to_le32(1); +	dst->i_xattr_nid = 0;  	new_ni = old_ni;  	new_ni.ino = ino; -	if (!inc_valid_node_count(sbi, NULL, 1)) +	if (unlikely(!inc_valid_node_count(sbi, NULL)))  		WARN_ON(1); -	set_node_addr(sbi, &new_ni, NEW_ADDR); +	set_node_addr(sbi, &new_ni, NEW_ADDR, false);  	inc_valid_inode_count(sbi);  	f2fs_put_page(ipage, 1);  	return 0;  } +/* + * ra_sum_pages() merge contiguous pages into one bio and submit. + * these pre-readed pages are alloced in bd_inode's mapping tree. + */ +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, +				int start, int nrpages) +{ +	struct inode *inode = sbi->sb->s_bdev->bd_inode; +	struct address_space *mapping = inode->i_mapping; +	int i, page_idx = start; +	struct f2fs_io_info fio = { +		.type = META, +		.rw = READ_SYNC | REQ_META | REQ_PRIO +	}; + +	for (i = 0; page_idx < start + nrpages; page_idx++, i++) { +		/* alloc page in bd_inode for reading node summary info */ +		pages[i] = grab_cache_page(mapping, page_idx); +		if (!pages[i]) +			break; +		f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); +	} + +	f2fs_submit_merged_bio(sbi, META, READ); +	return i; +} +  int restore_node_summary(struct f2fs_sb_info *sbi,  			unsigned int segno, struct f2fs_summary_block *sum)  {  	struct f2fs_node *rn;  	struct f2fs_summary *sum_entry; -	struct page *page; +	struct inode *inode = sbi->sb->s_bdev->bd_inode;  	block_t addr; -	int i, last_offset; - -	/* alloc temporal page for read node */ -	page = alloc_page(GFP_NOFS | __GFP_ZERO); -	if (!page) -		return -ENOMEM; -	lock_page(page); +	int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); +	struct page *pages[bio_blocks]; +	int i, idx, last_offset, nrpages, err = 0;  	/* scan the node segment */  	last_offset = sbi->blocks_per_seg;  	addr = START_BLOCK(sbi, segno);  	sum_entry = &sum->entries[0]; -	for (i = 0; i < last_offset; i++, sum_entry++) { -		/* -		 * In order to read next node page, -		 * we must clear PageUptodate flag. -		 */ -		ClearPageUptodate(page); +	for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { +		nrpages = min(last_offset - i, bio_blocks); -		if (f2fs_readpage(sbi, page, addr, READ_SYNC)) -			goto out; +		/* read ahead node pages */ +		nrpages = ra_sum_pages(sbi, pages, addr, nrpages); +		if (!nrpages) +			return -ENOMEM; + +		for (idx = 0; idx < nrpages; idx++) { +			if (err) +				goto skip; + +			lock_page(pages[idx]); +			if (unlikely(!PageUptodate(pages[idx]))) { +				err = -EIO; +			} else { +				rn = F2FS_NODE(pages[idx]); +				sum_entry->nid = rn->footer.nid; +				sum_entry->version = 0; +				sum_entry->ofs_in_node = 0; +				sum_entry++; +			} +			unlock_page(pages[idx]); +skip: +			page_cache_release(pages[idx]); +		} -		lock_page(page); -		rn = F2FS_NODE(page); -		sum_entry->nid = rn->footer.nid; -		sum_entry->version = 0; -		sum_entry->ofs_in_node = 0; -		addr++; +		invalidate_mapping_pages(inode->i_mapping, addr, +							addr + nrpages);  	} -	unlock_page(page); -out: -	__free_pages(page, 0); -	return 0; +	return err;  }  static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) @@ -1608,9 +1777,7 @@ retry:  			write_unlock(&nm_i->nat_tree_lock);  			goto retry;  		} -		nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); -		nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); -		nat_set_version(ne, raw_ne.version); +		node_info_from_raw_nat(&ne->ni, &raw_ne);  		__set_nat_cache_dirty(nm_i, ne);  		write_unlock(&nm_i->nat_tree_lock);  	} @@ -1627,7 +1794,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)  	struct f2fs_nm_info *nm_i = NM_I(sbi);  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);  	struct f2fs_summary_block *sum = curseg->sum_blk; -	struct list_head *cur, *n; +	struct nat_entry *ne, *cur;  	struct page *page = NULL;  	struct f2fs_nat_block *nat_blk = NULL;  	nid_t start_nid = 0, end_nid = 0; @@ -1639,18 +1806,16 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)  		mutex_lock(&curseg->curseg_mutex);  	/* 1) flush dirty nat caches */ -	list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { -		struct nat_entry *ne; +	list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {  		nid_t nid;  		struct f2fs_nat_entry raw_ne;  		int offset = -1; -		block_t new_blkaddr; - -		ne = list_entry(cur, struct nat_entry, list); -		nid = nat_get_nid(ne);  		if (nat_get_blkaddr(ne) == NEW_ADDR)  			continue; + +		nid = nat_get_nid(ne); +  		if (flushed)  			goto to_nat_page; @@ -1677,14 +1842,10 @@ to_nat_page:  			nat_blk = page_address(page);  		} -		BUG_ON(!nat_blk); +		f2fs_bug_on(!nat_blk);  		raw_ne = nat_blk->entries[nid - start_nid];  flush_now: -		new_blkaddr = nat_get_blkaddr(ne); - -		raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); -		raw_ne.block_addr = cpu_to_le32(new_blkaddr); -		raw_ne.version = nat_get_version(ne); +		raw_nat_from_node_info(&raw_ne, &ne->ni);  		if (offset < 0) {  			nat_blk->entries[nid - start_nid] = raw_ne; @@ -1694,23 +1855,19 @@ flush_now:  		}  		if (nat_get_blkaddr(ne) == NULL_ADDR && -				add_free_nid(NM_I(sbi), nid, false) <= 0) { +				add_free_nid(sbi, nid, false) <= 0) {  			write_lock(&nm_i->nat_tree_lock);  			__del_from_nat_cache(nm_i, ne);  			write_unlock(&nm_i->nat_tree_lock);  		} else {  			write_lock(&nm_i->nat_tree_lock);  			__clear_nat_cache_dirty(nm_i, ne); -			ne->checkpointed = true;  			write_unlock(&nm_i->nat_tree_lock);  		}  	}  	if (!flushed)  		mutex_unlock(&curseg->curseg_mutex);  	f2fs_put_page(page, 1); - -	/* 2) shrink nat caches if necessary */ -	try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);  }  static int init_node_manager(struct f2fs_sb_info *sbi) @@ -1725,10 +1882,16 @@ static int init_node_manager(struct f2fs_sb_info *sbi)  	/* segment_count_nat includes pair segment so divide to 2. */  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;  	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); +  	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + +	/* not used nids: 0, node, meta, (and root counted as valid node) */ +	nm_i->available_nids = nm_i->max_nid - 3;  	nm_i->fcnt = 0;  	nm_i->nat_cnt = 0; +	nm_i->ram_thresh = DEF_RAM_THRESHOLD; +	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);  	INIT_LIST_HEAD(&nm_i->free_nid_list);  	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);  	INIT_LIST_HEAD(&nm_i->nat_entries); @@ -1781,11 +1944,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)  	/* destroy free nid list */  	spin_lock(&nm_i->free_nid_list_lock);  	list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { -		BUG_ON(i->state == NID_ALLOC); -		__del_from_free_nid_list(i); +		f2fs_bug_on(i->state == NID_ALLOC); +		__del_from_free_nid_list(nm_i, i);  		nm_i->fcnt--; +		spin_unlock(&nm_i->free_nid_list_lock); +		kmem_cache_free(free_nid_slab, i); +		spin_lock(&nm_i->free_nid_list_lock);  	} -	BUG_ON(nm_i->fcnt); +	f2fs_bug_on(nm_i->fcnt);  	spin_unlock(&nm_i->free_nid_list_lock);  	/* destroy nat cache */ @@ -1793,13 +1959,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)  	while ((found = __gang_lookup_nat_cache(nm_i,  					nid, NATVEC_SIZE, natvec))) {  		unsigned idx; -		for (idx = 0; idx < found; idx++) { -			struct nat_entry *e = natvec[idx]; -			nid = nat_get_nid(e) + 1; -			__del_from_nat_cache(nm_i, e); -		} +		nid = nat_get_nid(natvec[found - 1]) + 1; +		for (idx = 0; idx < found; idx++) +			__del_from_nat_cache(nm_i, natvec[idx]);  	} -	BUG_ON(nm_i->nat_cnt); +	f2fs_bug_on(nm_i->nat_cnt);  	write_unlock(&nm_i->nat_tree_lock);  	kfree(nm_i->nat_bitmap); @@ -1810,12 +1974,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)  int __init create_node_manager_caches(void)  {  	nat_entry_slab = f2fs_kmem_cache_create("nat_entry", -			sizeof(struct nat_entry), NULL); +			sizeof(struct nat_entry));  	if (!nat_entry_slab)  		return -ENOMEM;  	free_nid_slab = f2fs_kmem_cache_create("free_nid", -			sizeof(struct free_nid), NULL); +			sizeof(struct free_nid));  	if (!free_nid_slab) {  		kmem_cache_destroy(nat_entry_slab);  		return -ENOMEM; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 3496bb3e15d..7281112cd1c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -17,14 +17,11 @@  /* # of pages to perform readahead before building free nids */  #define FREE_NID_PAGES 4 -/* maximum # of free node ids to produce during build_free_nids */ -#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) -  /* maximum readahead size for node during getting data blocks */  #define MAX_RA_NODE		128 -/* maximum cached nat entries to manage memory footprint */ -#define NM_WOUT_THRESHOLD	(64 * NAT_ENTRY_PER_BLOCK) +/* control the memory footprint threshold (10MB per 1GB ram) */ +#define DEF_RAM_THRESHOLD	10  /* vector size for gang look-up from nat cache that consists of radix tree */  #define NATVEC_SIZE	64 @@ -45,6 +42,7 @@ struct node_info {  struct nat_entry {  	struct list_head list;	/* for clean or dirty nat list */  	bool checkpointed;	/* whether it is checkpointed or not */ +	bool fsync_done;	/* whether the latest node has fsync mark */  	struct node_info ni;	/* in-memory node information */  }; @@ -58,9 +56,15 @@ struct nat_entry {  #define nat_set_version(nat, v)		(nat->ni.version = v)  #define __set_nat_cache_dirty(nm_i, ne)					\ -	list_move_tail(&ne->list, &nm_i->dirty_nat_entries); +	do {								\ +		ne->checkpointed = false;				\ +		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\ +	} while (0)  #define __clear_nat_cache_dirty(nm_i, ne)				\ -	list_move_tail(&ne->list, &nm_i->nat_entries); +	do {								\ +		ne->checkpointed = true;				\ +		list_move_tail(&ne->list, &nm_i->nat_entries);		\ +	} while (0)  #define inc_node_version(version)	(++version)  static inline void node_info_from_raw_nat(struct node_info *ni, @@ -71,6 +75,20 @@ static inline void node_info_from_raw_nat(struct node_info *ni,  	ni->version = raw_ne->version;  } +static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, +						struct node_info *ni) +{ +	raw_ne->ino = cpu_to_le32(ni->ino); +	raw_ne->block_addr = cpu_to_le32(ni->blk_addr); +	raw_ne->version = ni->version; +} + +enum mem_type { +	FREE_NIDS,	/* indicates the free nid list */ +	NAT_ENTRIES,	/* indicates the cached nat entry */ +	DIRTY_DENTS	/* indicates dirty dentry pages */ +}; +  /*   * For free nid mangement   */ @@ -224,13 +242,19 @@ static inline block_t next_blkaddr_of_node(struct page *node_page)   *    |            `- direct node (5 + N => 5 + 2N - 1)   *    `- double indirect node (5 + 2N)   *                 `- indirect node (6 + 2N) - *                       `- direct node (x(N + 1)) + *                       `- direct node + *                 ...... + *                 `- indirect node ((6 + 2N) + x(N + 1)) + *                       `- direct node + *                 ...... + *                 `- indirect node ((6 + 2N) + (N - 1)(N + 1)) + *                       `- direct node   */  static inline bool IS_DNODE(struct page *node_page)  {  	unsigned int ofs = ofs_of_node(node_page); -	if (ofs == XATTR_NODE_OFFSET) +	if (f2fs_has_xattr_block(ofs))  		return false;  	if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || @@ -248,7 +272,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)  {  	struct f2fs_node *rn = F2FS_NODE(p); -	wait_on_page_writeback(p); +	f2fs_wait_on_page_writeback(p, NODE);  	if (i)  		rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 51ef5eec33d..a112368a4a8 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -27,21 +27,18 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)  static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,  								nid_t ino)  { -	struct list_head *this;  	struct fsync_inode_entry *entry; -	list_for_each(this, head) { -		entry = list_entry(this, struct fsync_inode_entry, list); +	list_for_each_entry(entry, head, list)  		if (entry->inode->i_ino == ino)  			return entry; -	} +  	return NULL;  }  static int recover_dentry(struct page *ipage, struct inode *inode)  { -	struct f2fs_node *raw_node = F2FS_NODE(ipage); -	struct f2fs_inode *raw_inode = &(raw_node->i); +	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);  	nid_t pino = le32_to_cpu(raw_inode->i_pino);  	struct f2fs_dir_entry *de;  	struct qstr name; @@ -49,51 +46,71 @@ static int recover_dentry(struct page *ipage, struct inode *inode)  	struct inode *dir, *einode;  	int err = 0; -	dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); -	if (!dir) { -		dir = f2fs_iget(inode->i_sb, pino); -		if (IS_ERR(dir)) { -			err = PTR_ERR(dir); -			goto out; -		} -		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); -		add_dirty_dir_inode(dir); +	dir = f2fs_iget(inode->i_sb, pino); +	if (IS_ERR(dir)) { +		err = PTR_ERR(dir); +		goto out;  	}  	name.len = le32_to_cpu(raw_inode->i_namelen);  	name.name = raw_inode->i_name; + +	if (unlikely(name.len > F2FS_NAME_LEN)) { +		WARN_ON(1); +		err = -ENAMETOOLONG; +		goto out_err; +	}  retry:  	de = f2fs_find_entry(dir, &name, &page); -	if (de && inode->i_ino == le32_to_cpu(de->ino)) { -		kunmap(page); -		f2fs_put_page(page, 0); -		goto out; -	} +	if (de && inode->i_ino == le32_to_cpu(de->ino)) +		goto out_unmap_put;  	if (de) {  		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));  		if (IS_ERR(einode)) {  			WARN_ON(1); -			if (PTR_ERR(einode) == -ENOENT) +			err = PTR_ERR(einode); +			if (err == -ENOENT)  				err = -EEXIST; -			goto out; +			goto out_unmap_put; +		} +		err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); +		if (err) { +			iput(einode); +			goto out_unmap_put;  		}  		f2fs_delete_entry(de, page, einode);  		iput(einode);  		goto retry;  	}  	err = __f2fs_add_link(dir, &name, inode); +	if (err) +		goto out_err; + +	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) { +		iput(dir); +	} else { +		add_dirty_dir_inode(dir); +		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); +	} + +	goto out; + +out_unmap_put: +	kunmap(page); +	f2fs_put_page(page, 0); +out_err: +	iput(dir);  out: -	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " -			"ino = %x, name = %s, dir = %lx, err = %d", -			ino_of_node(ipage), raw_inode->i_name, +	f2fs_msg(inode->i_sb, KERN_NOTICE, +			"%s: ino = %x, name = %s, dir = %lx, err = %d", +			__func__, ino_of_node(ipage), raw_inode->i_name,  			IS_ERR(dir) ? 0 : dir->i_ino, err);  	return err;  }  static int recover_inode(struct inode *inode, struct page *node_page)  { -	struct f2fs_node *raw_node = F2FS_NODE(node_page); -	struct f2fs_inode *raw_inode = &(raw_node->i); +	struct f2fs_inode *raw_inode = F2FS_INODE(node_page);  	if (!IS_INODE(node_page))  		return 0; @@ -125,7 +142,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)  	/* get node pages in the current segment */  	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); -	blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; +	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);  	/* read node page */  	page = alloc_page(GFP_F2FS_ZERO); @@ -136,9 +153,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)  	while (1) {  		struct fsync_inode_entry *entry; -		err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); +		err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);  		if (err) -			goto out; +			return err;  		lock_page(page); @@ -184,9 +201,10 @@ next:  		/* check next segment */  		blkaddr = next_blkaddr_of_node(page);  	} +  	unlock_page(page); -out:  	__free_pages(page, 0); +  	return err;  } @@ -206,13 +224,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,  {  	struct seg_entry *sentry;  	unsigned int segno = GET_SEGNO(sbi, blkaddr); -	unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & -					(sbi->blocks_per_seg - 1); +	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); +	struct f2fs_summary_block *sum_node;  	struct f2fs_summary sum; +	struct page *sum_page, *node_page;  	nid_t ino, nid; -	void *kaddr;  	struct inode *inode; -	struct page *node_page;  	unsigned int offset;  	block_t bidx;  	int i; @@ -226,18 +243,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,  		struct curseg_info *curseg = CURSEG_I(sbi, i);  		if (curseg->segno == segno) {  			sum = curseg->sum_blk->entries[blkoff]; -			break; +			goto got_it;  		}  	} -	if (i > CURSEG_COLD_DATA) { -		struct page *sum_page = get_sum_page(sbi, segno); -		struct f2fs_summary_block *sum_node; -		kaddr = page_address(sum_page); -		sum_node = (struct f2fs_summary_block *)kaddr; -		sum = sum_node->entries[blkoff]; -		f2fs_put_page(sum_page, 1); -	} +	sum_page = get_sum_page(sbi, segno); +	sum_node = (struct f2fs_summary_block *)page_address(sum_page); +	sum = sum_node->entries[blkoff]; +	f2fs_put_page(sum_page, 1); +got_it:  	/* Use the locked dnode page and inode */  	nid = le32_to_cpu(sum.nid);  	if (dn->inode->i_ino == nid) { @@ -285,28 +299,31 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,  	struct f2fs_summary sum;  	struct node_info ni;  	int err = 0, recovered = 0; -	int ilock; + +	if (recover_inline_data(inode, page)) +		goto out; + +	if (recover_xattr_data(inode, page, blkaddr)) +		goto out;  	start = start_bidx_of_node(ofs_of_node(page), fi); -	if (IS_INODE(page)) -		end = start + ADDRS_PER_INODE(fi); -	else -		end = start + ADDRS_PER_BLOCK; +	end = start + ADDRS_PER_PAGE(page, fi); + +	f2fs_lock_op(sbi); -	ilock = mutex_lock_op(sbi);  	set_new_dnode(&dn, inode, NULL, NULL, 0);  	err = get_dnode_of_data(&dn, start, ALLOC_NODE);  	if (err) { -		mutex_unlock_op(sbi, ilock); -		return err; +		f2fs_unlock_op(sbi); +		goto out;  	} -	wait_on_page_writeback(dn.node_page); +	f2fs_wait_on_page_writeback(dn.node_page, NODE);  	get_node_info(sbi, dn.nid, &ni); -	BUG_ON(ni.ino != ino_of_node(page)); -	BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); +	f2fs_bug_on(ni.ino != ino_of_node(page)); +	f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page));  	for (; start < end; start++) {  		block_t src, dest; @@ -316,9 +333,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,  		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {  			if (src == NULL_ADDR) { -				int err = reserve_new_block(&dn); +				err = reserve_new_block(&dn);  				/* We should not get -ENOSPC */ -				BUG_ON(err); +				f2fs_bug_on(err);  			}  			/* Check the previous node page having this index */ @@ -349,11 +366,11 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,  	recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);  err:  	f2fs_put_dnode(&dn); -	mutex_unlock_op(sbi, ilock); - -	f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " -			"recovered_data = %d blocks, err = %d", -			inode->i_ino, recovered, err); +	f2fs_unlock_op(sbi); +out: +	f2fs_msg(sbi->sb, KERN_NOTICE, +		"recover_data: ino = %lx, recovered = %d blocks, err = %d", +		inode->i_ino, recovered, err);  	return err;  } @@ -371,7 +388,7 @@ static int recover_data(struct f2fs_sb_info *sbi,  	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);  	/* read node page */ -	page = alloc_page(GFP_NOFS | __GFP_ZERO); +	page = alloc_page(GFP_F2FS_ZERO);  	if (!page)  		return -ENOMEM; @@ -380,9 +397,9 @@ static int recover_data(struct f2fs_sb_info *sbi,  	while (1) {  		struct fsync_inode_entry *entry; -		err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); +		err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);  		if (err) -			goto out; +			return err;  		lock_page(page); @@ -406,8 +423,8 @@ next:  		/* check next segment */  		blkaddr = next_blkaddr_of_node(page);  	} +  	unlock_page(page); -out:  	__free_pages(page, 0);  	if (!err) @@ -419,16 +436,17 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)  {  	struct list_head inode_list;  	int err; +	bool need_writecp = false;  	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", -			sizeof(struct fsync_inode_entry), NULL); -	if (unlikely(!fsync_entry_slab)) +			sizeof(struct fsync_inode_entry)); +	if (!fsync_entry_slab)  		return -ENOMEM;  	INIT_LIST_HEAD(&inode_list);  	/* step #1: find fsynced inode numbers */ -	sbi->por_doing = 1; +	sbi->por_doing = true;  	err = find_fsync_dnodes(sbi, &inode_list);  	if (err)  		goto out; @@ -436,14 +454,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)  	if (list_empty(&inode_list))  		goto out; +	need_writecp = true; +  	/* step #2: recover data */  	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); -	BUG_ON(!list_empty(&inode_list)); +	f2fs_bug_on(!list_empty(&inode_list));  out:  	destroy_fsync_dnodes(&inode_list);  	kmem_cache_destroy(fsync_entry_slab); -	sbi->por_doing = 0; -	if (!err) +	sbi->por_doing = false; +	if (!err && need_writecp)  		write_checkpoint(sbi, false);  	return err;  } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 09af9c7b0f5..d04613df710 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -13,13 +13,165 @@  #include <linux/bio.h>  #include <linux/blkdev.h>  #include <linux/prefetch.h> +#include <linux/kthread.h>  #include <linux/vmalloc.h> +#include <linux/swap.h>  #include "f2fs.h"  #include "segment.h"  #include "node.h"  #include <trace/events/f2fs.h> +#define __reverse_ffz(x) __reverse_ffs(~(x)) + +static struct kmem_cache *discard_entry_slab; + +/* + * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since + * MSB and LSB are reversed in a byte by f2fs_set_bit. + */ +static inline unsigned long __reverse_ffs(unsigned long word) +{ +	int num = 0; + +#if BITS_PER_LONG == 64 +	if ((word & 0xffffffff) == 0) { +		num += 32; +		word >>= 32; +	} +#endif +	if ((word & 0xffff) == 0) { +		num += 16; +		word >>= 16; +	} +	if ((word & 0xff) == 0) { +		num += 8; +		word >>= 8; +	} +	if ((word & 0xf0) == 0) +		num += 4; +	else +		word >>= 4; +	if ((word & 0xc) == 0) +		num += 2; +	else +		word >>= 2; +	if ((word & 0x2) == 0) +		num += 1; +	return num; +} + +/* + * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue + * f2fs_set_bit makes MSB and LSB reversed in a byte. + * Example: + *                             LSB <--> MSB + *   f2fs_set_bit(0, bitmap) => 0000 0001 + *   f2fs_set_bit(7, bitmap) => 1000 0000 + */ +static unsigned long __find_rev_next_bit(const unsigned long *addr, +			unsigned long size, unsigned long offset) +{ +	const unsigned long *p = addr + BIT_WORD(offset); +	unsigned long result = offset & ~(BITS_PER_LONG - 1); +	unsigned long tmp; +	unsigned long mask, submask; +	unsigned long quot, rest; + +	if (offset >= size) +		return size; + +	size -= result; +	offset %= BITS_PER_LONG; +	if (!offset) +		goto aligned; + +	tmp = *(p++); +	quot = (offset >> 3) << 3; +	rest = offset & 0x7; +	mask = ~0UL << quot; +	submask = (unsigned char)(0xff << rest) >> rest; +	submask <<= quot; +	mask &= submask; +	tmp &= mask; +	if (size < BITS_PER_LONG) +		goto found_first; +	if (tmp) +		goto found_middle; + +	size -= BITS_PER_LONG; +	result += BITS_PER_LONG; +aligned: +	while (size & ~(BITS_PER_LONG-1)) { +		tmp = *(p++); +		if (tmp) +			goto found_middle; +		result += BITS_PER_LONG; +		size -= BITS_PER_LONG; +	} +	if (!size) +		return result; +	tmp = *p; +found_first: +	tmp &= (~0UL >> (BITS_PER_LONG - size)); +	if (tmp == 0UL)		/* Are any bits set? */ +		return result + size;   /* Nope. */ +found_middle: +	return result + __reverse_ffs(tmp); +} + +static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, +			unsigned long size, unsigned long offset) +{ +	const unsigned long *p = addr + BIT_WORD(offset); +	unsigned long result = offset & ~(BITS_PER_LONG - 1); +	unsigned long tmp; +	unsigned long mask, submask; +	unsigned long quot, rest; + +	if (offset >= size) +		return size; + +	size -= result; +	offset %= BITS_PER_LONG; +	if (!offset) +		goto aligned; + +	tmp = *(p++); +	quot = (offset >> 3) << 3; +	rest = offset & 0x7; +	mask = ~(~0UL << quot); +	submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest); +	submask <<= quot; +	mask += submask; +	tmp |= mask; +	if (size < BITS_PER_LONG) +		goto found_first; +	if (~tmp) +		goto found_middle; + +	size -= BITS_PER_LONG; +	result += BITS_PER_LONG; +aligned: +	while (size & ~(BITS_PER_LONG - 1)) { +		tmp = *(p++); +		if (~tmp) +			goto found_middle; +		result += BITS_PER_LONG; +		size -= BITS_PER_LONG; +	} +	if (!size) +		return result; +	tmp = *p; + +found_first: +	tmp |= ~0UL << size; +	if (tmp == ~0UL)        /* Are any bits zero? */ +		return result + size;   /* Nope. */ +found_middle: +	return result + __reverse_ffz(tmp); +} +  /*   * This function balances dirty node and dentry pages.   * In addition, it controls garbage collection. @@ -36,6 +188,114 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)  	}  } +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) +{ +	/* check the # of cached NAT entries and prefree segments */ +	if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || +				excess_prefree_segs(sbi)) +		f2fs_sync_fs(sbi->sb, true); +} + +static int issue_flush_thread(void *data) +{ +	struct f2fs_sb_info *sbi = data; +	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; +	wait_queue_head_t *q = &fcc->flush_wait_queue; +repeat: +	if (kthread_should_stop()) +		return 0; + +	spin_lock(&fcc->issue_lock); +	if (fcc->issue_list) { +		fcc->dispatch_list = fcc->issue_list; +		fcc->issue_list = fcc->issue_tail = NULL; +	} +	spin_unlock(&fcc->issue_lock); + +	if (fcc->dispatch_list) { +		struct bio *bio = bio_alloc(GFP_NOIO, 0); +		struct flush_cmd *cmd, *next; +		int ret; + +		bio->bi_bdev = sbi->sb->s_bdev; +		ret = submit_bio_wait(WRITE_FLUSH, bio); + +		for (cmd = fcc->dispatch_list; cmd; cmd = next) { +			cmd->ret = ret; +			next = cmd->next; +			complete(&cmd->wait); +		} +		bio_put(bio); +		fcc->dispatch_list = NULL; +	} + +	wait_event_interruptible(*q, +			kthread_should_stop() || fcc->issue_list); +	goto repeat; +} + +int f2fs_issue_flush(struct f2fs_sb_info *sbi) +{ +	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; +	struct flush_cmd cmd; + +	if (!test_opt(sbi, FLUSH_MERGE)) +		return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); + +	init_completion(&cmd.wait); +	cmd.next = NULL; + +	spin_lock(&fcc->issue_lock); +	if (fcc->issue_list) +		fcc->issue_tail->next = &cmd; +	else +		fcc->issue_list = &cmd; +	fcc->issue_tail = &cmd; +	spin_unlock(&fcc->issue_lock); + +	if (!fcc->dispatch_list) +		wake_up(&fcc->flush_wait_queue); + +	wait_for_completion(&cmd.wait); + +	return cmd.ret; +} + +int create_flush_cmd_control(struct f2fs_sb_info *sbi) +{ +	dev_t dev = sbi->sb->s_bdev->bd_dev; +	struct flush_cmd_control *fcc; +	int err = 0; + +	fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); +	if (!fcc) +		return -ENOMEM; +	spin_lock_init(&fcc->issue_lock); +	init_waitqueue_head(&fcc->flush_wait_queue); +	sbi->sm_info->cmd_control_info = fcc; +	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, +				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); +	if (IS_ERR(fcc->f2fs_issue_flush)) { +		err = PTR_ERR(fcc->f2fs_issue_flush); +		kfree(fcc); +		sbi->sm_info->cmd_control_info = NULL; +		return err; +	} + +	return err; +} + +void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) +{ +	struct flush_cmd_control *fcc = +				sbi->sm_info->cmd_control_info; + +	if (fcc && fcc->f2fs_issue_flush) +		kthread_stop(fcc->f2fs_issue_flush); +	kfree(fcc); +	sbi->sm_info->cmd_control_info = NULL; +} +  static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,  		enum dirty_type dirty_type)  { @@ -50,20 +310,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,  	if (dirty_type == DIRTY) {  		struct seg_entry *sentry = get_seg_entry(sbi, segno); -		enum dirty_type t = DIRTY_HOT_DATA; +		enum dirty_type t = sentry->type; -		dirty_type = sentry->type; - -		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) -			dirty_i->nr_dirty[dirty_type]++; - -		/* Only one bitmap should be set */ -		for (; t <= DIRTY_COLD_NODE; t++) { -			if (t == dirty_type) -				continue; -			if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) -				dirty_i->nr_dirty[t]--; -		} +		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) +			dirty_i->nr_dirty[t]++;  	}  } @@ -76,12 +326,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,  		dirty_i->nr_dirty[dirty_type]--;  	if (dirty_type == DIRTY) { -		enum dirty_type t = DIRTY_HOT_DATA; +		struct seg_entry *sentry = get_seg_entry(sbi, segno); +		enum dirty_type t = sentry->type; -		/* clear all the bitmaps */ -		for (; t <= DIRTY_COLD_NODE; t++) -			if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) -				dirty_i->nr_dirty[t]--; +		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) +			dirty_i->nr_dirty[t]--;  		if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)  			clear_bit(GET_SECNO(sbi, segno), @@ -119,6 +368,69 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)  	mutex_unlock(&dirty_i->seglist_lock);  } +static int f2fs_issue_discard(struct f2fs_sb_info *sbi, +				block_t blkstart, block_t blklen) +{ +	sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); +	sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); +	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); +	return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); +} + +void discard_next_dnode(struct f2fs_sb_info *sbi) +{ +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); +	block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + +	if (f2fs_issue_discard(sbi, blkaddr, 1)) { +		struct page *page = grab_meta_page(sbi, blkaddr); +		/* zero-filled page */ +		set_page_dirty(page); +		f2fs_put_page(page, 1); +	} +} + +static void add_discard_addrs(struct f2fs_sb_info *sbi, +			unsigned int segno, struct seg_entry *se) +{ +	struct list_head *head = &SM_I(sbi)->discard_list; +	struct discard_entry *new; +	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); +	int max_blocks = sbi->blocks_per_seg; +	unsigned long *cur_map = (unsigned long *)se->cur_valid_map; +	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; +	unsigned long dmap[entries]; +	unsigned int start = 0, end = -1; +	int i; + +	if (!test_opt(sbi, DISCARD)) +		return; + +	/* zero block will be discarded through the prefree list */ +	if (!se->valid_blocks || se->valid_blocks == max_blocks) +		return; + +	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ +	for (i = 0; i < entries; i++) +		dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + +	while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { +		start = __find_rev_next_bit(dmap, max_blocks, end + 1); +		if (start >= max_blocks) +			break; + +		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + +		new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); +		INIT_LIST_HEAD(&new->list); +		new->blkaddr = START_BLOCK(sbi, segno) + start; +		new->len = end - start; + +		list_add_tail(&new->list, head); +		SM_I(sbi)->nr_discards += end - start; +	} +} +  /*   * Should call clear_prefree_segments after checkpoint is done.   */ @@ -141,30 +453,42 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)  void clear_prefree_segments(struct f2fs_sb_info *sbi)  { +	struct list_head *head = &(SM_I(sbi)->discard_list); +	struct discard_entry *entry, *this;  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); -	unsigned int segno = -1; +	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];  	unsigned int total_segs = TOTAL_SEGS(sbi); +	unsigned int start = 0, end = -1;  	mutex_lock(&dirty_i->seglist_lock); +  	while (1) { -		segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, -				segno + 1); -		if (segno >= total_segs) +		int i; +		start = find_next_bit(prefree_map, total_segs, end + 1); +		if (start >= total_segs)  			break; +		end = find_next_zero_bit(prefree_map, total_segs, start + 1); + +		for (i = start; i < end; i++) +			clear_bit(i, prefree_map); + +		dirty_i->nr_dirty[PRE] -= end - start; -		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) -			dirty_i->nr_dirty[PRE]--; - -		/* Let's use trim */ -		if (test_opt(sbi, DISCARD)) -			blkdev_issue_discard(sbi->sb->s_bdev, -					START_BLOCK(sbi, segno) << -					sbi->log_sectors_per_block, -					1 << (sbi->log_sectors_per_block + -						sbi->log_blocks_per_seg), -					GFP_NOFS, 0); +		if (!test_opt(sbi, DISCARD)) +			continue; + +		f2fs_issue_discard(sbi, START_BLOCK(sbi, start), +				(end - start) << sbi->log_blocks_per_seg);  	}  	mutex_unlock(&dirty_i->seglist_lock); + +	/* send small discards */ +	list_for_each_entry_safe(entry, this, head, list) { +		f2fs_issue_discard(sbi, entry->blkaddr, entry->len); +		list_del(&entry->list); +		SM_I(sbi)->nr_discards -= entry->len; +		kmem_cache_free(discard_entry_slab, entry); +	}  }  static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) @@ -193,9 +517,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)  	se = get_seg_entry(sbi, segno);  	new_vblocks = se->valid_blocks + del; -	offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); +	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); -	BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) || +	f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||  				(new_vblocks > sbi->blocks_per_seg)));  	se->valid_blocks = new_vblocks; @@ -222,12 +546,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)  		get_sec_entry(sbi, segno)->valid_blocks += del;  } -static void refresh_sit_entry(struct f2fs_sb_info *sbi, -			block_t old_blkaddr, block_t new_blkaddr) +void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)  { -	update_sit_entry(sbi, new_blkaddr, 1); -	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) -		update_sit_entry(sbi, old_blkaddr, -1); +	update_sit_entry(sbi, new, 1); +	if (GET_SEGNO(sbi, old) != NULL_SEGNO) +		update_sit_entry(sbi, old, -1); + +	locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); +	locate_dirty_segment(sbi, GET_SEGNO(sbi, new));  }  void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) @@ -235,7 +561,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)  	unsigned int segno = GET_SEGNO(sbi, addr);  	struct sit_info *sit_i = SIT_I(sbi); -	BUG_ON(addr == NULL_ADDR); +	f2fs_bug_on(addr == NULL_ADDR);  	if (addr == NEW_ADDR)  		return; @@ -267,9 +593,8 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,   */  int npages_for_summary_flush(struct f2fs_sb_info *sbi)  { -	int total_size_bytes = 0;  	int valid_sum_count = 0; -	int i, sum_space; +	int i, sum_in_page;  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {  		if (sbi->ckpt->alloc_type[i] == SSR) @@ -278,13 +603,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi)  			valid_sum_count += curseg_blkoff(sbi, i);  	} -	total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) -			+ sizeof(struct nat_journal) + 2 -			+ sizeof(struct sit_journal) + 2; -	sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; -	if (total_size_bytes < sum_space) +	sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - +			SUM_FOOTER_SIZE) / SUMMARY_SIZE; +	if (valid_sum_count <= sum_in_page)  		return 1; -	else if (total_size_bytes < 2 * sum_space) +	else if ((valid_sum_count - sum_in_page) <= +		(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)  		return 2;  	return 3;  } @@ -350,7 +674,7 @@ find_other_zone:  		if (dir == ALLOC_RIGHT) {  			secno = find_next_zero_bit(free_i->free_secmap,  							TOTAL_SECS(sbi), 0); -			BUG_ON(secno >= TOTAL_SECS(sbi)); +			f2fs_bug_on(secno >= TOTAL_SECS(sbi));  		} else {  			go_left = 1;  			left_start = hint - 1; @@ -366,7 +690,7 @@ find_other_zone:  		}  		left_start = find_next_zero_bit(free_i->free_secmap,  							TOTAL_SECS(sbi), 0); -		BUG_ON(left_start >= TOTAL_SECS(sbi)); +		f2fs_bug_on(left_start >= TOTAL_SECS(sbi));  		break;  	}  	secno = left_start; @@ -405,7 +729,7 @@ skip_left:  	}  got_it:  	/* set it as dirty segment in free segmap */ -	BUG_ON(test_bit(segno, free_i->free_segmap)); +	f2fs_bug_on(test_bit(segno, free_i->free_segmap));  	__set_inuse(sbi, segno);  	*newseg = segno;  	write_unlock(&free_i->segmap_lock); @@ -458,13 +782,18 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,  			struct curseg_info *seg, block_t start)  {  	struct seg_entry *se = get_seg_entry(sbi, seg->segno); -	block_t ofs; -	for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { -		if (!f2fs_test_bit(ofs, se->ckpt_valid_map) -			&& !f2fs_test_bit(ofs, se->cur_valid_map)) -			break; -	} -	seg->next_blkoff = ofs; +	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); +	unsigned long target_map[entries]; +	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; +	unsigned long *cur_map = (unsigned long *)se->cur_valid_map; +	int i, pos; + +	for (i = 0; i < entries; i++) +		target_map[i] = ckpt_map[i] | cur_map[i]; + +	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); + +	seg->next_blkoff = pos;  }  /* @@ -550,9 +879,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,  		change_curseg(sbi, type, true);  	else  		new_curseg(sbi, type, false); -#ifdef CONFIG_F2FS_STAT_FS -	sbi->segment_count[curseg->alloc_type]++; -#endif + +	stat_inc_seg_type(sbi, curseg);  }  void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -573,141 +901,6 @@ static const struct segment_allocation default_salloc_ops = {  	.allocate_segment = allocate_segment_by_default,  }; -static void f2fs_end_io_write(struct bio *bio, int err) -{ -	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); -	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; -	struct bio_private *p = bio->bi_private; - -	do { -		struct page *page = bvec->bv_page; - -		if (--bvec >= bio->bi_io_vec) -			prefetchw(&bvec->bv_page->flags); -		if (!uptodate) { -			SetPageError(page); -			if (page->mapping) -				set_bit(AS_EIO, &page->mapping->flags); -			set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); -			p->sbi->sb->s_flags |= MS_RDONLY; -		} -		end_page_writeback(page); -		dec_page_count(p->sbi, F2FS_WRITEBACK); -	} while (bvec >= bio->bi_io_vec); - -	if (p->is_sync) -		complete(p->wait); -	kfree(p); -	bio_put(bio); -} - -struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) -{ -	struct bio *bio; - -	/* No failure on bio allocation */ -	bio = bio_alloc(GFP_NOIO, npages); -	bio->bi_bdev = bdev; -	bio->bi_private = NULL; - -	return bio; -} - -static void do_submit_bio(struct f2fs_sb_info *sbi, -				enum page_type type, bool sync) -{ -	int rw = sync ? WRITE_SYNC : WRITE; -	enum page_type btype = type > META ? META : type; - -	if (type >= META_FLUSH) -		rw = WRITE_FLUSH_FUA; - -	if (btype == META) -		rw |= REQ_META; - -	if (sbi->bio[btype]) { -		struct bio_private *p = sbi->bio[btype]->bi_private; -		p->sbi = sbi; -		sbi->bio[btype]->bi_end_io = f2fs_end_io_write; - -		trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); - -		if (type == META_FLUSH) { -			DECLARE_COMPLETION_ONSTACK(wait); -			p->is_sync = true; -			p->wait = &wait; -			submit_bio(rw, sbi->bio[btype]); -			wait_for_completion(&wait); -		} else { -			p->is_sync = false; -			submit_bio(rw, sbi->bio[btype]); -		} -		sbi->bio[btype] = NULL; -	} -} - -void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) -{ -	down_write(&sbi->bio_sem); -	do_submit_bio(sbi, type, sync); -	up_write(&sbi->bio_sem); -} - -static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, -				block_t blk_addr, enum page_type type) -{ -	struct block_device *bdev = sbi->sb->s_bdev; - -	verify_block_addr(sbi, blk_addr); - -	down_write(&sbi->bio_sem); - -	inc_page_count(sbi, F2FS_WRITEBACK); - -	if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) -		do_submit_bio(sbi, type, false); -alloc_new: -	if (sbi->bio[type] == NULL) { -		struct bio_private *priv; -retry: -		priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); -		if (!priv) { -			cond_resched(); -			goto retry; -		} - -		sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); -		sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); -		sbi->bio[type]->bi_private = priv; -		/* -		 * The end_io will be assigned at the sumbission phase. -		 * Until then, let bio_add_page() merge consecutive IOs as much -		 * as possible. -		 */ -	} - -	if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < -							PAGE_CACHE_SIZE) { -		do_submit_bio(sbi, type, false); -		goto alloc_new; -	} - -	sbi->last_block_in_bio[type] = blk_addr; - -	up_write(&sbi->bio_sem); -	trace_f2fs_submit_write_page(page, blk_addr, type); -} - -void f2fs_wait_on_page_writeback(struct page *page, -				enum page_type type, bool sync) -{ -	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); -	if (PageWriteback(page)) { -		f2fs_submit_bio(sbi, type, sync); -		wait_on_page_writeback(page); -	} -} -  static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)  {  	struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -771,20 +964,18 @@ static int __get_segment_type(struct page *page, enum page_type p_type)  		return __get_segment_type_4(page, p_type);  	}  	/* NR_CURSEG_TYPE(6) logs by default */ -	BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); +	f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE);  	return __get_segment_type_6(page, p_type);  } -static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, -			block_t old_blkaddr, block_t *new_blkaddr, -			struct f2fs_summary *sum, enum page_type p_type) +void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +		block_t old_blkaddr, block_t *new_blkaddr, +		struct f2fs_summary *sum, int type)  {  	struct sit_info *sit_i = SIT_I(sbi);  	struct curseg_info *curseg;  	unsigned int old_cursegno; -	int type; -	type = __get_segment_type(page, p_type);  	curseg = CURSEG_I(sbi, type);  	mutex_lock(&curseg->curseg_mutex); @@ -801,66 +992,78 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,  	mutex_lock(&sit_i->sentry_lock);  	__refresh_next_blkoff(sbi, curseg); -#ifdef CONFIG_F2FS_STAT_FS -	sbi->block_count[curseg->alloc_type]++; -#endif +	stat_inc_block_count(sbi, curseg); + +	if (!__has_curseg_space(sbi, type)) +		sit_i->s_ops->allocate_segment(sbi, type, false);  	/*  	 * SIT information should be updated before segment allocation,  	 * since SSR needs latest valid block information.  	 */  	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - -	if (!__has_curseg_space(sbi, type)) -		sit_i->s_ops->allocate_segment(sbi, type, false); -  	locate_dirty_segment(sbi, old_cursegno); -	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); +  	mutex_unlock(&sit_i->sentry_lock); -	if (p_type == NODE) +	if (page && IS_NODESEG(type))  		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); -	/* writeout dirty page into bdev */ -	submit_write_page(sbi, page, *new_blkaddr, p_type); -  	mutex_unlock(&curseg->curseg_mutex);  } +static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, +			block_t old_blkaddr, block_t *new_blkaddr, +			struct f2fs_summary *sum, struct f2fs_io_info *fio) +{ +	int type = __get_segment_type(page, fio->type); + +	allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); + +	/* writeout dirty page into bdev */ +	f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); +} +  void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)  { +	struct f2fs_io_info fio = { +		.type = META, +		.rw = WRITE_SYNC | REQ_META | REQ_PRIO +	}; +  	set_page_writeback(page); -	submit_write_page(sbi, page, page->index, META); +	f2fs_submit_page_mbio(sbi, page, page->index, &fio);  }  void write_node_page(struct f2fs_sb_info *sbi, struct page *page, +		struct f2fs_io_info *fio,  		unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)  {  	struct f2fs_summary sum;  	set_summary(&sum, nid, 0, 0); -	do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); +	do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);  } -void write_data_page(struct inode *inode, struct page *page, -		struct dnode_of_data *dn, block_t old_blkaddr, -		block_t *new_blkaddr) +void write_data_page(struct page *page, struct dnode_of_data *dn, +		block_t *new_blkaddr, struct f2fs_io_info *fio)  { -	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);  	struct f2fs_summary sum;  	struct node_info ni; -	BUG_ON(old_blkaddr == NULL_ADDR); +	f2fs_bug_on(dn->data_blkaddr == NULL_ADDR);  	get_node_info(sbi, dn->nid, &ni);  	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); -	do_write_page(sbi, page, old_blkaddr, -			new_blkaddr, &sum, DATA); +	do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);  } -void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, -					block_t old_blk_addr) +void rewrite_data_page(struct page *page, block_t old_blkaddr, +					struct f2fs_io_info *fio)  { -	submit_write_page(sbi, page, old_blk_addr, DATA); +	struct inode *inode = page->mapping->host; +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio);  }  void recover_data_page(struct f2fs_sb_info *sbi, @@ -896,14 +1099,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,  		change_curseg(sbi, type, true);  	} -	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & -					(sbi->blocks_per_seg - 1); +	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);  	__add_sum_entry(sbi, type, sum);  	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); -  	locate_dirty_segment(sbi, old_cursegno); -	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));  	mutex_unlock(&sit_i->sentry_lock);  	mutex_unlock(&curseg->curseg_mutex); @@ -919,6 +1119,10 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,  	unsigned int segno, old_cursegno;  	block_t next_blkaddr = next_blkaddr_of_node(page);  	unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); +	struct f2fs_io_info fio = { +		.type = NODE, +		.rw = WRITE_SYNC, +	};  	curseg = CURSEG_I(sbi, type); @@ -933,8 +1137,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,  		curseg->next_segno = segno;  		change_curseg(sbi, type, true);  	} -	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & -					(sbi->blocks_per_seg - 1); +	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);  	__add_sum_entry(sbi, type, sum);  	/* change the current log to the next block addr in advance */ @@ -942,22 +1145,54 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,  		curseg->next_segno = next_segno;  		change_curseg(sbi, type, true);  	} -	curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & -					(sbi->blocks_per_seg - 1); +	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);  	/* rewrite node page */  	set_page_writeback(page); -	submit_write_page(sbi, page, new_blkaddr, NODE); -	f2fs_submit_bio(sbi, NODE, true); +	f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); +	f2fs_submit_merged_bio(sbi, NODE, WRITE);  	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); -  	locate_dirty_segment(sbi, old_cursegno); -	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));  	mutex_unlock(&sit_i->sentry_lock);  	mutex_unlock(&curseg->curseg_mutex);  } +static inline bool is_merged_page(struct f2fs_sb_info *sbi, +					struct page *page, enum page_type type) +{ +	enum page_type btype = PAGE_TYPE_OF_BIO(type); +	struct f2fs_bio_info *io = &sbi->write_io[btype]; +	struct bio_vec *bvec; +	int i; + +	down_read(&io->io_rwsem); +	if (!io->bio) +		goto out; + +	bio_for_each_segment_all(bvec, io->bio, i) { +		if (page == bvec->bv_page) { +			up_read(&io->io_rwsem); +			return true; +		} +	} + +out: +	up_read(&io->io_rwsem); +	return false; +} + +void f2fs_wait_on_page_writeback(struct page *page, +				enum page_type type) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); +	if (PageWriteback(page)) { +		if (is_merged_page(sbi, page, type)) +			f2fs_submit_merged_bio(sbi, type, WRITE); +		wait_on_page_writeback(page); +	} +} +  static int read_compacted_summaries(struct f2fs_sb_info *sbi)  {  	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1062,9 +1297,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)  				ns->ofs_in_node = 0;  			}  		} else { -			if (restore_node_summary(sbi, segno, sum)) { +			int err; + +			err = restore_node_summary(sbi, segno, sum); +			if (err) {  				f2fs_put_page(new, 1); -				return -EINVAL; +				return err;  			}  		}  	} @@ -1085,6 +1323,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)  static int restore_curseg_summaries(struct f2fs_sb_info *sbi)  {  	int type = CURSEG_HOT_DATA; +	int err;  	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {  		/* restore for compacted data summary */ @@ -1093,9 +1332,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)  		type = CURSEG_HOT_NODE;  	} -	for (; type <= CURSEG_COLD_NODE; type++) -		if (read_normal_summaries(sbi, type)) -			return -EINVAL; +	for (; type <= CURSEG_COLD_NODE; type++) { +		err = read_normal_summaries(sbi, type); +		if (err) +			return err; +	} +  	return 0;  } @@ -1122,8 +1364,6 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)  						SUM_JOURNAL_SIZE);  	written_size += SUM_JOURNAL_SIZE; -	set_page_dirty(page); -  	/* Step 3: write summary entries */  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {  		unsigned short blkoff; @@ -1142,18 +1382,20 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)  			summary = (struct f2fs_summary *)(kaddr + written_size);  			*summary = seg_i->sum_blk->entries[j];  			written_size += SUMMARY_SIZE; -			set_page_dirty(page);  			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -  							SUM_FOOTER_SIZE)  				continue; +			set_page_dirty(page);  			f2fs_put_page(page, 1);  			page = NULL;  		}  	} -	if (page) +	if (page) { +		set_page_dirty(page);  		f2fs_put_page(page, 1); +	}  }  static void write_normal_summaries(struct f2fs_sb_info *sbi, @@ -1239,7 +1481,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,  	/* get current sit block page without lock */  	src_page = get_meta_page(sbi, src_off);  	dst_page = grab_meta_page(sbi, dst_off); -	BUG_ON(PageDirty(src_page)); +	f2fs_bug_on(PageDirty(src_page));  	src_addr = page_address(src_page);  	dst_addr = page_address(dst_page); @@ -1271,9 +1513,9 @@ static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)  			__mark_sit_entry_dirty(sbi, segno);  		}  		update_sits_in_cursum(sum, -sits_in_cursum(sum)); -		return 1; +		return true;  	} -	return 0; +	return false;  }  /* @@ -1308,6 +1550,10 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)  		sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); +		/* add discard candidates */ +		if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) +			add_discard_addrs(sbi, segno, se); +  		if (flushed)  			goto to_sit_page; @@ -1479,36 +1725,48 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)  	struct sit_info *sit_i = SIT_I(sbi);  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);  	struct f2fs_summary_block *sum = curseg->sum_blk; -	unsigned int start; +	int sit_blk_cnt = SIT_BLK_CNT(sbi); +	unsigned int i, start, end; +	unsigned int readed, start_blk = 0; +	int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); -	for (start = 0; start < TOTAL_SEGS(sbi); start++) { -		struct seg_entry *se = &sit_i->sentries[start]; -		struct f2fs_sit_block *sit_blk; -		struct f2fs_sit_entry sit; -		struct page *page; -		int i; - -		mutex_lock(&curseg->curseg_mutex); -		for (i = 0; i < sits_in_cursum(sum); i++) { -			if (le32_to_cpu(segno_in_journal(sum, i)) == start) { -				sit = sit_in_journal(sum, i); -				mutex_unlock(&curseg->curseg_mutex); -				goto got_it; +	do { +		readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); + +		start = start_blk * sit_i->sents_per_block; +		end = (start_blk + readed) * sit_i->sents_per_block; + +		for (; start < end && start < TOTAL_SEGS(sbi); start++) { +			struct seg_entry *se = &sit_i->sentries[start]; +			struct f2fs_sit_block *sit_blk; +			struct f2fs_sit_entry sit; +			struct page *page; + +			mutex_lock(&curseg->curseg_mutex); +			for (i = 0; i < sits_in_cursum(sum); i++) { +				if (le32_to_cpu(segno_in_journal(sum, i)) +								== start) { +					sit = sit_in_journal(sum, i); +					mutex_unlock(&curseg->curseg_mutex); +					goto got_it; +				}  			} -		} -		mutex_unlock(&curseg->curseg_mutex); -		page = get_current_sit_page(sbi, start); -		sit_blk = (struct f2fs_sit_block *)page_address(page); -		sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; -		f2fs_put_page(page, 1); +			mutex_unlock(&curseg->curseg_mutex); + +			page = get_current_sit_page(sbi, start); +			sit_blk = (struct f2fs_sit_block *)page_address(page); +			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; +			f2fs_put_page(page, 1);  got_it: -		check_block_count(sbi, start, &sit); -		seg_info_from_raw_sit(se, &sit); -		if (sbi->segs_per_sec > 1) { -			struct sec_entry *e = get_sec_entry(sbi, start); -			e->valid_blocks += se->valid_blocks; +			check_block_count(sbi, start, &sit); +			seg_info_from_raw_sit(se, &sit); +			if (sbi->segs_per_sec > 1) { +				struct sec_entry *e = get_sec_entry(sbi, start); +				e->valid_blocks += se->valid_blocks; +			}  		} -	} +		start_blk += readed; +	} while (start_blk < sit_blk_cnt);  }  static void init_free_segmap(struct f2fs_sb_info *sbi) @@ -1628,8 +1886,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)  	/* init sm info */  	sbi->sm_info = sm_info; -	INIT_LIST_HEAD(&sm_info->wblist_head); -	spin_lock_init(&sm_info->wblist_lock);  	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);  	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);  	sm_info->segment_count = le32_to_cpu(raw_super->segment_count); @@ -1637,6 +1893,20 @@ int build_segment_manager(struct f2fs_sb_info *sbi)  	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);  	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);  	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); +	sm_info->rec_prefree_segments = sm_info->main_segments * +					DEF_RECLAIM_PREFREE_SEGMENTS / 100; +	sm_info->ipu_policy = F2FS_IPU_DISABLE; +	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; + +	INIT_LIST_HEAD(&sm_info->discard_list); +	sm_info->nr_discards = 0; +	sm_info->max_discards = 0; + +	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { +		err = create_flush_cmd_control(sbi); +		if (err) +			return err; +	}  	err = build_sit_info(sbi);  	if (err) @@ -1744,6 +2014,10 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)  void destroy_segment_manager(struct f2fs_sb_info *sbi)  {  	struct f2fs_sm_info *sm_info = SM_I(sbi); + +	if (!sm_info) +		return; +	destroy_flush_cmd_control(sbi);  	destroy_dirty_segmap(sbi);  	destroy_curseg(sbi);  	destroy_free_segmap(sbi); @@ -1751,3 +2025,17 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)  	sbi->sm_info = NULL;  	kfree(sm_info);  } + +int __init create_segment_manager_caches(void) +{ +	discard_entry_slab = f2fs_kmem_cache_create("discard_entry", +			sizeof(struct discard_entry)); +	if (!discard_entry_slab) +		return -ENOMEM; +	return 0; +} + +void destroy_segment_manager_caches(void) +{ +	kmem_cache_destroy(discard_entry_slab); +} diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index bdd10eab8c4..7091204680f 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -14,17 +14,14 @@  #define NULL_SEGNO			((unsigned int)(~0))  #define NULL_SECNO			((unsigned int)(~0)) +#define DEF_RECLAIM_PREFREE_SEGMENTS	5	/* 5% over total segments */ +  /* L: Logical segment # in volume, R: Relative segment # in main area */  #define GET_L2R_SEGNO(free_i, segno)	(segno - free_i->start_segno)  #define GET_R2L_SEGNO(free_i, segno)	(segno + free_i->start_segno) -#define IS_DATASEG(t)							\ -	((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) ||		\ -	(t == CURSEG_WARM_DATA)) - -#define IS_NODESEG(t)							\ -	((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) ||		\ -	(t == CURSEG_WARM_NODE)) +#define IS_DATASEG(t)	(t <= CURSEG_COLD_DATA) +#define IS_NODESEG(t)	(t >= CURSEG_HOT_NODE)  #define IS_CURSEG(sbi, seg)						\  	((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) ||	\ @@ -60,6 +57,9 @@  	((blk_addr) - SM_I(sbi)->seg0_blkaddr)  #define GET_SEGNO_FROM_SEG0(sbi, blk_addr)				\  	(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) +#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr)				\ +	(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) +  #define GET_SEGNO(sbi, blk_addr)					\  	(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ?		\  	NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi),			\ @@ -81,22 +81,19 @@  	(segno / SIT_ENTRY_PER_BLOCK)  #define	START_SEGNO(sit_i, segno)		\  	(SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define SIT_BLK_CNT(sbi)			\ +	((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)  #define f2fs_bitmap_size(nr)			\  	(BITS_TO_LONGS(nr) * sizeof(unsigned long))  #define TOTAL_SEGS(sbi)	(SM_I(sbi)->main_segments)  #define TOTAL_SECS(sbi)	(sbi->total_sections)  #define SECTOR_FROM_BLOCK(sbi, blk_addr)				\ -	(blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) +	(((sector_t)blk_addr) << (sbi)->log_sectors_per_block)  #define SECTOR_TO_BLOCK(sbi, sectors)					\ -	(sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) - -/* during checkpoint, bio_private is used to synchronize the last bio */ -struct bio_private { -	struct f2fs_sb_info *sbi; -	bool is_sync; -	void *wait; -}; +	(sectors >> (sbi)->log_sectors_per_block) +#define MAX_BIO_BLOCKS(max_hw_blocks)					\ +	(min((int)max_hw_blocks, BIO_MAX_PAGES))  /*   * indicate a block allocation direction: RIGHT and LEFT. @@ -383,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,  static inline block_t written_block_count(struct f2fs_sb_info *sbi)  { -	struct sit_info *sit_i = SIT_I(sbi); -	block_t vblocks; - -	mutex_lock(&sit_i->sentry_lock); -	vblocks = sit_i->written_valid_blocks; -	mutex_unlock(&sit_i->sentry_lock); - -	return vblocks; +	return SIT_I(sbi)->written_valid_blocks;  }  static inline unsigned int free_segments(struct f2fs_sb_info *sbi)  { -	struct free_segmap_info *free_i = FREE_I(sbi); -	unsigned int free_segs; - -	read_lock(&free_i->segmap_lock); -	free_segs = free_i->free_segments; -	read_unlock(&free_i->segmap_lock); - -	return free_segs; +	return FREE_I(sbi)->free_segments;  }  static inline int reserved_segments(struct f2fs_sb_info *sbi) @@ -412,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)  static inline unsigned int free_sections(struct f2fs_sb_info *sbi)  { -	struct free_segmap_info *free_i = FREE_I(sbi); -	unsigned int free_secs; - -	read_lock(&free_i->segmap_lock); -	free_secs = free_i->free_sections; -	read_unlock(&free_i->segmap_lock); - -	return free_secs; +	return FREE_I(sbi)->free_sections;  }  static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) @@ -454,8 +430,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)  static inline bool need_SSR(struct f2fs_sb_info *sbi)  { -	return ((prefree_segments(sbi) / sbi->segs_per_sec) -			+ free_sections(sbi) < overprovision_sections(sbi)); +	return (prefree_segments(sbi) / sbi->segs_per_sec) +			+ free_sections(sbi) < overprovision_sections(sbi);  }  static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) @@ -463,33 +439,71 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)  	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);  	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); -	if (sbi->por_doing) +	if (unlikely(sbi->por_doing))  		return false; -	return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + -						reserved_sections(sbi))); +	return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + +						reserved_sections(sbi)); +} + +static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) +{ +	return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;  }  static inline int utilization(struct f2fs_sb_info *sbi)  { -	return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); +	return div_u64((u64)valid_user_blocks(sbi) * 100, +					sbi->user_block_count);  }  /*   * Sometimes f2fs may be better to drop out-of-place update policy. - * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write - * data in the original place likewise other traditional file systems. - * But, currently set 100 in percentage, which means it is disabled. - * See below need_inplace_update(). + * And, users can control the policy through sysfs entries. + * There are five policies with triggering conditions as follows. + * F2FS_IPU_FORCE - all the time, + * F2FS_IPU_SSR - if SSR mode is activated, + * F2FS_IPU_UTIL - if FS utilization is over threashold, + * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over + *                     threashold, + * F2FS_IPUT_DISABLE - disable IPU. (=default option)   */ -#define MIN_IPU_UTIL		100 +#define DEF_MIN_IPU_UTIL	70 + +enum { +	F2FS_IPU_FORCE, +	F2FS_IPU_SSR, +	F2FS_IPU_UTIL, +	F2FS_IPU_SSR_UTIL, +	F2FS_IPU_DISABLE, +}; +  static inline bool need_inplace_update(struct inode *inode)  {  	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + +	/* IPU can be done only for the user data */  	if (S_ISDIR(inode->i_mode))  		return false; -	if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + +	switch (SM_I(sbi)->ipu_policy) { +	case F2FS_IPU_FORCE:  		return true; +	case F2FS_IPU_SSR: +		if (need_SSR(sbi)) +			return true; +		break; +	case F2FS_IPU_UTIL: +		if (utilization(sbi) > SM_I(sbi)->min_ipu_util) +			return true; +		break; +	case F2FS_IPU_SSR_UTIL: +		if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) +			return true; +		break; +	case F2FS_IPU_DISABLE: +		break; +	}  	return false;  } @@ -513,16 +527,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)  	return curseg->next_blkoff;  } +#ifdef CONFIG_F2FS_CHECK_FS  static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)  {  	unsigned int end_segno = SM_I(sbi)->segment_count - 1;  	BUG_ON(segno > end_segno);  } -/* - * This function is used for only debugging. - * NOTE: In future, we have to remove this function. - */  static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)  {  	struct f2fs_sm_info *sm_info = SM_I(sbi); @@ -541,8 +552,9 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,  {  	struct f2fs_sm_info *sm_info = SM_I(sbi);  	unsigned int end_segno = sm_info->segment_count - 1; +	bool is_valid  = test_bit_le(0, raw_sit->valid_map) ? true : false;  	int valid_blocks = 0; -	int i; +	int cur_pos = 0, next_pos;  	/* check segment usage */  	BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); @@ -551,11 +563,26 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,  	BUG_ON(segno > end_segno);  	/* check bitmap with valid block count */ -	for (i = 0; i < sbi->blocks_per_seg; i++) -		if (f2fs_test_bit(i, raw_sit->valid_map)) -			valid_blocks++; +	do { +		if (is_valid) { +			next_pos = find_next_zero_bit_le(&raw_sit->valid_map, +					sbi->blocks_per_seg, +					cur_pos); +			valid_blocks += next_pos - cur_pos; +		} else +			next_pos = find_next_bit_le(&raw_sit->valid_map, +					sbi->blocks_per_seg, +					cur_pos); +		cur_pos = next_pos; +		is_valid = !is_valid; +	} while (cur_pos < sbi->blocks_per_seg);  	BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);  } +#else +#define check_seg_range(sbi, segno) +#define verify_block_addr(sbi, blk_addr) +#define check_block_count(sbi, segno, raw_sit) +#endif  static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,  						unsigned int start) @@ -637,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)  	struct request_queue *q = bdev_get_queue(bdev);  	return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));  } + +/* + * It is very important to gather dirty pages and write at once, so that we can + * submit a big bio without interfering other data writes. + * By default, 512 pages for directory data, + * 512 pages (2MB) * 3 for three types of nodes, and + * max_bio_blocks for meta are set. + */ +static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) +{ +	if (type == DATA) +		return sbi->blocks_per_seg; +	else if (type == NODE) +		return 3 * sbi->blocks_per_seg; +	else if (type == META) +		return MAX_BIO_BLOCKS(max_hw_blocks(sbi)); +	else +		return 0; +} + +/* + * When writing pages, it'd better align nr_to_write for segment size. + */ +static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, +					struct writeback_control *wbc) +{ +	long nr_to_write, desired; + +	if (wbc->sync_mode != WB_SYNC_NONE) +		return 0; + +	nr_to_write = wbc->nr_to_write; + +	if (type == DATA) +		desired = 4096; +	else if (type == NODE) +		desired = 3 * max_hw_blocks(sbi); +	else +		desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + +	wbc->nr_to_write = desired; +	return desired - nr_to_write; +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 13d0a0fe49d..8f96d9372ad 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -43,11 +43,15 @@ enum {  	Opt_disable_roll_forward,  	Opt_discard,  	Opt_noheap, +	Opt_user_xattr,  	Opt_nouser_xattr, +	Opt_acl,  	Opt_noacl,  	Opt_active_logs,  	Opt_disable_ext_identify,  	Opt_inline_xattr, +	Opt_inline_data, +	Opt_flush_merge,  	Opt_err,  }; @@ -56,33 +60,59 @@ static match_table_t f2fs_tokens = {  	{Opt_disable_roll_forward, "disable_roll_forward"},  	{Opt_discard, "discard"},  	{Opt_noheap, "no_heap"}, +	{Opt_user_xattr, "user_xattr"},  	{Opt_nouser_xattr, "nouser_xattr"}, +	{Opt_acl, "acl"},  	{Opt_noacl, "noacl"},  	{Opt_active_logs, "active_logs=%u"},  	{Opt_disable_ext_identify, "disable_ext_identify"},  	{Opt_inline_xattr, "inline_xattr"}, +	{Opt_inline_data, "inline_data"}, +	{Opt_flush_merge, "flush_merge"},  	{Opt_err, NULL},  };  /* Sysfs support for f2fs */ +enum { +	GC_THREAD,	/* struct f2fs_gc_thread */ +	SM_INFO,	/* struct f2fs_sm_info */ +	NM_INFO,	/* struct f2fs_nm_info */ +	F2FS_SBI,	/* struct f2fs_sb_info */ +}; +  struct f2fs_attr {  	struct attribute attr;  	ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);  	ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,  			 const char *, size_t); +	int struct_type;  	int offset;  }; +static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) +{ +	if (struct_type == GC_THREAD) +		return (unsigned char *)sbi->gc_thread; +	else if (struct_type == SM_INFO) +		return (unsigned char *)SM_I(sbi); +	else if (struct_type == NM_INFO) +		return (unsigned char *)NM_I(sbi); +	else if (struct_type == F2FS_SBI) +		return (unsigned char *)sbi; +	return NULL; +} +  static ssize_t f2fs_sbi_show(struct f2fs_attr *a,  			struct f2fs_sb_info *sbi, char *buf)  { -	struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; +	unsigned char *ptr = NULL;  	unsigned int *ui; -	if (!gc_kth) +	ptr = __struct_ptr(sbi, a->struct_type); +	if (!ptr)  		return -EINVAL; -	ui = (unsigned int *)(((char *)gc_kth) + a->offset); +	ui = (unsigned int *)(ptr + a->offset);  	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);  } @@ -91,15 +121,16 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,  			struct f2fs_sb_info *sbi,  			const char *buf, size_t count)  { -	struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; +	unsigned char *ptr;  	unsigned long t;  	unsigned int *ui;  	ssize_t ret; -	if (!gc_kth) +	ptr = __struct_ptr(sbi, a->struct_type); +	if (!ptr)  		return -EINVAL; -	ui = (unsigned int *)(((char *)gc_kth) + a->offset); +	ui = (unsigned int *)(ptr + a->offset);  	ret = kstrtoul(skip_spaces(buf), 0, &t);  	if (ret < 0) @@ -135,21 +166,31 @@ static void f2fs_sb_release(struct kobject *kobj)  	complete(&sbi->s_kobj_unregister);  } -#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ +#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \  static struct f2fs_attr f2fs_attr_##_name = {			\  	.attr = {.name = __stringify(_name), .mode = _mode },	\  	.show	= _show,					\  	.store	= _store,					\ -	.offset = offsetof(struct f2fs_gc_kthread, _elname),	\ +	.struct_type = _struct_type,				\ +	.offset = _offset					\  } -#define F2FS_RW_ATTR(name, elname)	\ -	F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) - -F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); -F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); -F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(gc_idle, gc_idle); +#define F2FS_RW_ATTR(struct_type, struct_name, name, elname)	\ +	F2FS_ATTR_OFFSET(struct_type, name, 0644,		\ +		f2fs_sbi_show, f2fs_sbi_store,			\ +		offsetof(struct struct_name, elname)) + +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);  #define ATTR_LIST(name) (&f2fs_attr_##name.attr)  static struct attribute *f2fs_attrs[] = { @@ -157,6 +198,13 @@ static struct attribute *f2fs_attrs[] = {  	ATTR_LIST(gc_max_sleep_time),  	ATTR_LIST(gc_no_gc_sleep_time),  	ATTR_LIST(gc_idle), +	ATTR_LIST(reclaim_segments), +	ATTR_LIST(max_small_discards), +	ATTR_LIST(ipu_policy), +	ATTR_LIST(min_ipu_util), +	ATTR_LIST(max_victim_search), +	ATTR_LIST(dir_level), +	ATTR_LIST(ram_thresh),  	NULL,  }; @@ -217,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options)  			if (!name)  				return -ENOMEM; -			if (!strncmp(name, "on", 2)) +			if (strlen(name) == 2 && !strncmp(name, "on", 2))  				set_opt(sbi, BG_GC); -			else if (!strncmp(name, "off", 3)) +			else if (strlen(name) == 3 && !strncmp(name, "off", 3))  				clear_opt(sbi, BG_GC);  			else {  				kfree(name); @@ -237,6 +285,9 @@ static int parse_options(struct super_block *sb, char *options)  			set_opt(sbi, NOHEAP);  			break;  #ifdef CONFIG_F2FS_FS_XATTR +		case Opt_user_xattr: +			set_opt(sbi, XATTR_USER); +			break;  		case Opt_nouser_xattr:  			clear_opt(sbi, XATTR_USER);  			break; @@ -244,6 +295,10 @@ static int parse_options(struct super_block *sb, char *options)  			set_opt(sbi, INLINE_XATTR);  			break;  #else +		case Opt_user_xattr: +			f2fs_msg(sb, KERN_INFO, +				"user_xattr options not supported"); +			break;  		case Opt_nouser_xattr:  			f2fs_msg(sb, KERN_INFO,  				"nouser_xattr options not supported"); @@ -254,10 +309,16 @@ static int parse_options(struct super_block *sb, char *options)  			break;  #endif  #ifdef CONFIG_F2FS_FS_POSIX_ACL +		case Opt_acl: +			set_opt(sbi, POSIX_ACL); +			break;  		case Opt_noacl:  			clear_opt(sbi, POSIX_ACL);  			break;  #else +		case Opt_acl: +			f2fs_msg(sb, KERN_INFO, "acl options not supported"); +			break;  		case Opt_noacl:  			f2fs_msg(sb, KERN_INFO, "noacl options not supported");  			break; @@ -272,6 +333,12 @@ static int parse_options(struct super_block *sb, char *options)  		case Opt_disable_ext_identify:  			set_opt(sbi, DISABLE_EXT_IDENTIFY);  			break; +		case Opt_inline_data: +			set_opt(sbi, INLINE_DATA); +			break; +		case Opt_flush_merge: +			set_opt(sbi, FLUSH_MERGE); +			break;  		default:  			f2fs_msg(sb, KERN_ERR,  				"Unrecognized mount option \"%s\" or missing value", @@ -286,7 +353,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)  {  	struct f2fs_inode_info *fi; -	fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); +	fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);  	if (!fi)  		return NULL; @@ -298,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)  	fi->i_current_depth = 1;  	fi->i_advise = 0;  	rwlock_init(&fi->ext.ext_lock); +	init_rwsem(&fi->i_sem);  	set_inode_flag(fi, FI_NEW_INODE);  	if (test_opt(F2FS_SB(sb), INLINE_XATTR))  		set_inode_flag(fi, FI_INLINE_XATTR); +	/* Will be used by directory only */ +	fi->i_dir_level = F2FS_SB(sb)->dir_level; +  	return &fi->vfs_inode;  } @@ -355,7 +426,9 @@ static void f2fs_put_super(struct super_block *sb)  	f2fs_destroy_stats(sbi);  	stop_gc_thread(sbi); -	write_checkpoint(sbi, true); +	/* We don't need to do checkpoint when it's clean */ +	if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) +		write_checkpoint(sbi, true);  	iput(sbi->node_inode);  	iput(sbi->meta_inode); @@ -441,7 +514,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)  {  	struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); -	if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) +	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))  		seq_printf(seq, ",background_gc=%s", "on");  	else  		seq_printf(seq, ",background_gc=%s", "off"); @@ -467,7 +540,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)  #endif  	if (test_opt(sbi, DISABLE_EXT_IDENTIFY))  		seq_puts(seq, ",disable_ext_identify"); - +	if (test_opt(sbi, INLINE_DATA)) +		seq_puts(seq, ",inline_data"); +	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) +		seq_puts(seq, ",flush_merge");  	seq_printf(seq, ",active_logs=%u", sbi->active_logs);  	return 0; @@ -477,16 +553,26 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)  {  	struct super_block *sb = seq->private;  	struct f2fs_sb_info *sbi = F2FS_SB(sb); -	unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); +	unsigned int total_segs = +			le32_to_cpu(sbi->raw_super->segment_count_main);  	int i; +	seq_puts(seq, "format: segment_type|valid_blocks\n" +		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); +  	for (i = 0; i < total_segs; i++) { -		seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); -		if (i != 0 && (i % 10) == 0) -			seq_puts(seq, "\n"); +		struct seg_entry *se = get_seg_entry(sbi, i); + +		if ((i % 10) == 0) +			seq_printf(seq, "%-5d", i); +		seq_printf(seq, "%d|%-3u", se->type, +					get_valid_blocks(sbi, i, 1)); +		if ((i % 10) == 9 || i == (total_segs - 1)) +			seq_putc(seq, '\n');  		else -			seq_puts(seq, " "); +			seq_putc(seq, ' ');  	} +  	return 0;  } @@ -508,6 +594,10 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  	struct f2fs_sb_info *sbi = F2FS_SB(sb);  	struct f2fs_mount_info org_mount_opt;  	int err, active_logs; +	bool need_restart_gc = false; +	bool need_stop_gc = false; + +	sync_filesystem(sb);  	/*  	 * Save the old mount options in case we @@ -523,7 +613,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  	/*  	 * Previous and new state of filesystem is RO, -	 * so no point in checking GC conditions. +	 * so skip checking GC and FLUSH_MERGE conditions.  	 */  	if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))  		goto skip; @@ -537,18 +627,40 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  		if (sbi->gc_thread) {  			stop_gc_thread(sbi);  			f2fs_sync_fs(sb, 1); +			need_restart_gc = true;  		}  	} else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {  		err = start_gc_thread(sbi);  		if (err)  			goto restore_opts; +		need_stop_gc = true; +	} + +	/* +	 * We stop issue flush thread if FS is mounted as RO +	 * or if flush_merge is not passed in mount option. +	 */ +	if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { +		destroy_flush_cmd_control(sbi); +	} else if (test_opt(sbi, FLUSH_MERGE) && +					!sbi->sm_info->cmd_control_info) { +		err = create_flush_cmd_control(sbi); +		if (err) +			goto restore_gc;  	}  skip:  	/* Update the POSIXACL Flag */  	 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |  		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);  	return 0; - +restore_gc: +	if (need_restart_gc) { +		if (start_gc_thread(sbi)) +			f2fs_msg(sbi->sb, KERN_WARNING, +				"background gc thread is stop"); +	} else if (need_stop_gc) { +		stop_gc_thread(sbi); +	}  restore_opts:  	sbi->mount_opt = org_mount_opt;  	sbi->active_logs = active_logs; @@ -577,7 +689,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,  	struct f2fs_sb_info *sbi = F2FS_SB(sb);  	struct inode *inode; -	if (ino < F2FS_ROOT_INO(sbi)) +	if (check_nid_range(sbi, ino))  		return ERR_PTR(-ESTALE);  	/* @@ -588,7 +700,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,  	inode = f2fs_iget(sb, ino);  	if (IS_ERR(inode))  		return ERR_CAST(inode); -	if (generation && inode->i_generation != generation) { +	if (unlikely(generation && inode->i_generation != generation)) {  		/* we didn't find the right inode.. */  		iput(inode);  		return ERR_PTR(-ESTALE); @@ -691,10 +803,10 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)  	fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);  	fsmeta += le32_to_cpu(raw_super->segment_count_ssa); -	if (fsmeta >= total) +	if (unlikely(fsmeta >= total))  		return 1; -	if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { +	if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {  		f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");  		return 1;  	} @@ -722,35 +834,56 @@ static void init_sb_info(struct f2fs_sb_info *sbi)  	sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);  	sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);  	sbi->cur_victim_sec = NULL_SECNO; +	sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;  	for (i = 0; i < NR_COUNT_TYPE; i++)  		atomic_set(&sbi->nr_pages[i], 0); + +	sbi->dir_level = DEF_DIR_LEVEL;  } -static int validate_superblock(struct super_block *sb, -		struct f2fs_super_block **raw_super, -		struct buffer_head **raw_super_buf, sector_t block) +/* + * Read f2fs raw super block. + * Because we have two copies of super block, so read the first one at first, + * if the first one is invalid, move to read the second one. + */ +static int read_raw_super_block(struct super_block *sb, +			struct f2fs_super_block **raw_super, +			struct buffer_head **raw_super_buf)  { -	const char *super = (block == 0 ? "first" : "second"); +	int block = 0; -	/* read f2fs raw super block */ +retry:  	*raw_super_buf = sb_bread(sb, block);  	if (!*raw_super_buf) { -		f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", -				super); -		return -EIO; +		f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", +				block + 1); +		if (block == 0) { +			block++; +			goto retry; +		} else { +			return -EIO; +		}  	}  	*raw_super = (struct f2fs_super_block *)  		((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);  	/* sanity checking of raw super */ -	if (!sanity_check_raw_super(sb, *raw_super)) -		return 0; +	if (sanity_check_raw_super(sb, *raw_super)) { +		brelse(*raw_super_buf); +		f2fs_msg(sb, KERN_ERR, +			"Can't find valid F2FS filesystem in %dth superblock", +								block + 1); +		if (block == 0) { +			block++; +			goto retry; +		} else { +			return -EINVAL; +		} +	} -	f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " -				"in %s superblock", super); -	return -EINVAL; +	return 0;  }  static int f2fs_fill_super(struct super_block *sb, void *data, int silent) @@ -768,19 +901,15 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  		return -ENOMEM;  	/* set a block size */ -	if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { +	if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {  		f2fs_msg(sb, KERN_ERR, "unable to set blocksize");  		goto free_sbi;  	} -	err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); -	if (err) { -		brelse(raw_super_buf); -		/* check secondary superblock when primary failed */ -		err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); -		if (err) -			goto free_sb_buf; -	} +	err = read_raw_super_block(sb, &raw_super, &raw_super_buf); +	if (err) +		goto free_sbi; +  	sb->s_fs_info = sbi;  	/* init some FS parameters */  	sbi->active_logs = NR_CURSEG_TYPE; @@ -818,12 +947,21 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  	mutex_init(&sbi->gc_mutex);  	mutex_init(&sbi->writepages);  	mutex_init(&sbi->cp_mutex); -	for (i = 0; i < NR_GLOBAL_LOCKS; i++) -		mutex_init(&sbi->fs_lock[i]);  	mutex_init(&sbi->node_write); -	sbi->por_doing = 0; +	sbi->por_doing = false;  	spin_lock_init(&sbi->stat_lock); -	init_rwsem(&sbi->bio_sem); + +	init_rwsem(&sbi->read_io.io_rwsem); +	sbi->read_io.sbi = sbi; +	sbi->read_io.bio = NULL; +	for (i = 0; i < NR_PAGE_TYPE; i++) { +		init_rwsem(&sbi->write_io[i].io_rwsem); +		sbi->write_io[i].sbi = sbi; +		sbi->write_io[i].bio = NULL; +	} + +	init_rwsem(&sbi->cp_rwsem); +	init_waitqueue_head(&sbi->cp_wait);  	init_sb_info(sbi);  	/* get an inode for meta space */ @@ -886,9 +1024,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  	}  	/* if there are nt orphan nodes free them */ -	err = -EINVAL; -	if (recover_orphan_inodes(sbi)) -		goto free_node_inode; +	recover_orphan_inodes(sbi);  	/* read root inode and dentry */  	root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); @@ -897,8 +1033,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  		err = PTR_ERR(root);  		goto free_node_inode;  	} -	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) +	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { +		err = -EINVAL;  		goto free_root_inode; +	}  	sb->s_root = d_make_root(root); /* allocate root dentry */  	if (!sb->s_root) { @@ -906,28 +1044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  		goto free_root_inode;  	} -	/* recover fsynced data */ -	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { -		err = recover_fsync_data(sbi); -		if (err) -			f2fs_msg(sb, KERN_ERR, -				"Cannot recover all fsync data errno=%ld", err); -	} - -	/* -	 * If filesystem is not mounted as read-only then -	 * do start the gc_thread. -	 */ -	if (!(sb->s_flags & MS_RDONLY)) { -		/* After POR, we can run background GC thread.*/ -		err = start_gc_thread(sbi); -		if (err) -			goto fail; -	} -  	err = f2fs_build_stats(sbi);  	if (err) -		goto fail; +		goto free_root_inode;  	if (f2fs_proc_root)  		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -949,11 +1068,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)  	err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,  							"%s", sb->s_id);  	if (err) -		goto fail; +		goto free_proc; +	/* recover fsynced data */ +	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { +		err = recover_fsync_data(sbi); +		if (err) +			f2fs_msg(sb, KERN_ERR, +				"Cannot recover all fsync data errno=%ld", err); +	} + +	/* +	 * If filesystem is not mounted as read-only then +	 * do start the gc_thread. +	 */ +	if (!(sb->s_flags & MS_RDONLY)) { +		/* After POR, we can run background GC thread.*/ +		err = start_gc_thread(sbi); +		if (err) +			goto free_kobj; +	}  	return 0; -fail: -	stop_gc_thread(sbi); + +free_kobj: +	kobject_del(&sbi->s_kobj); +free_proc: +	if (sbi->s_proc) { +		remove_proc_entry("segment_info", sbi->s_proc); +		remove_proc_entry(sb->s_id, f2fs_proc_root); +	} +	f2fs_destroy_stats(sbi);  free_root_inode:  	dput(sb->s_root);  	sb->s_root = NULL; @@ -993,8 +1137,8 @@ MODULE_ALIAS_FS("f2fs");  static int __init init_inodecache(void)  {  	f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", -			sizeof(struct f2fs_inode_info), NULL); -	if (f2fs_inode_cachep == NULL) +			sizeof(struct f2fs_inode_info)); +	if (!f2fs_inode_cachep)  		return -ENOMEM;  	return 0;  } @@ -1019,9 +1163,12 @@ static int __init init_f2fs_fs(void)  	err = create_node_manager_caches();  	if (err)  		goto free_inodecache; -	err = create_gc_caches(); +	err = create_segment_manager_caches();  	if (err)  		goto free_node_manager_caches; +	err = create_gc_caches(); +	if (err) +		goto free_segment_manager_caches;  	err = create_checkpoint_caches();  	if (err)  		goto free_gc_caches; @@ -1043,6 +1190,8 @@ free_checkpoint_caches:  	destroy_checkpoint_caches();  free_gc_caches:  	destroy_gc_caches(); +free_segment_manager_caches: +	destroy_segment_manager_caches();  free_node_manager_caches:  	destroy_node_manager_caches();  free_inodecache: @@ -1058,6 +1207,7 @@ static void __exit exit_f2fs_fs(void)  	unregister_filesystem(&f2fs_fs_type);  	destroy_checkpoint_caches();  	destroy_gc_caches(); +	destroy_segment_manager_caches();  	destroy_node_manager_caches();  	destroy_inodecache();  	kset_unregister(f2fs_kset); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 1ac8a5f6e38..8bea941ee30 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -21,11 +21,12 @@  #include <linux/rwsem.h>  #include <linux/f2fs_fs.h>  #include <linux/security.h> +#include <linux/posix_acl_xattr.h>  #include "f2fs.h"  #include "xattr.h"  static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, -		size_t list_size, const char *name, size_t name_len, int type) +		size_t list_size, const char *name, size_t len, int type)  {  	struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);  	int total_len, prefix_len = 0; @@ -52,11 +53,11 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,  		return -EINVAL;  	} -	total_len = prefix_len + name_len + 1; +	total_len = prefix_len + len + 1;  	if (list && total_len <= list_size) {  		memcpy(list, prefix, prefix_len); -		memcpy(list + prefix_len, name, name_len); -		list[prefix_len + name_len] = '\0'; +		memcpy(list + prefix_len, name, len); +		list[prefix_len + len] = '\0';  	}  	return total_len;  } @@ -107,11 +108,12 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,  	if (strcmp(name, "") == 0)  		return -EINVAL; -	return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); +	return f2fs_setxattr(dentry->d_inode, type, name, +					value, size, NULL, flags);  }  static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, -		size_t list_size, const char *name, size_t name_len, int type) +		size_t list_size, const char *name, size_t len, int type)  {  	const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;  	size_t size; @@ -163,7 +165,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,  	for (xattr = xattr_array; xattr->name != NULL; xattr++) {  		err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,  				xattr->name, xattr->value, -				xattr->value_len, (struct page *)page); +				xattr->value_len, (struct page *)page, 0);  		if (err < 0)  			break;  	} @@ -213,8 +215,8 @@ const struct xattr_handler f2fs_xattr_security_handler = {  static const struct xattr_handler *f2fs_xattr_handler_map[] = {  	[F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,  #ifdef CONFIG_F2FS_FS_POSIX_ACL -	[F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, -	[F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, +	[F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, +	[F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,  #endif  	[F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,  #ifdef CONFIG_F2FS_FS_SECURITY @@ -226,8 +228,8 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = {  const struct xattr_handler *f2fs_xattr_handlers[] = {  	&f2fs_xattr_user_handler,  #ifdef CONFIG_F2FS_FS_POSIX_ACL -	&f2fs_xattr_acl_access_handler, -	&f2fs_xattr_acl_default_handler, +	&posix_acl_access_xattr_handler, +	&posix_acl_default_xattr_handler,  #endif  	&f2fs_xattr_trusted_handler,  #ifdef CONFIG_F2FS_FS_SECURITY @@ -237,26 +239,26 @@ const struct xattr_handler *f2fs_xattr_handlers[] = {  	NULL,  }; -static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) +static inline const struct xattr_handler *f2fs_xattr_handler(int index)  {  	const struct xattr_handler *handler = NULL; -	if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) -		handler = f2fs_xattr_handler_map[name_index]; +	if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map)) +		handler = f2fs_xattr_handler_map[index];  	return handler;  } -static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, -					size_t name_len, const char *name) +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, +					size_t len, const char *name)  {  	struct f2fs_xattr_entry *entry;  	list_for_each_xattr(entry, base_addr) { -		if (entry->e_name_index != name_index) +		if (entry->e_name_index != index)  			continue; -		if (entry->e_name_len != name_len) +		if (entry->e_name_len != len)  			continue; -		if (!memcmp(entry->e_name, name, name_len)) +		if (!memcmp(entry->e_name, name, len))  			break;  	}  	return entry; @@ -271,7 +273,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)  	inline_size = inline_xattr_size(inode); -	txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); +	txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);  	if (!txattr_addr)  		return NULL; @@ -343,6 +345,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,  		if (ipage) {  			inline_addr = inline_xattr_addr(ipage); +			f2fs_wait_on_page_writeback(ipage, NODE);  		} else {  			page = get_node_page(sbi, inode->i_ino);  			if (IS_ERR(page)) { @@ -350,6 +353,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,  				return PTR_ERR(page);  			}  			inline_addr = inline_xattr_addr(page); +			f2fs_wait_on_page_writeback(page, NODE);  		}  		memcpy(inline_addr, txattr_addr, inline_size);  		f2fs_put_page(page, 1); @@ -369,7 +373,8 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,  			alloc_nid_failed(sbi, new_nid);  			return PTR_ERR(xpage);  		} -		BUG_ON(new_nid); +		f2fs_bug_on(new_nid); +		f2fs_wait_on_page_writeback(xpage, NODE);  	} else {  		struct dnode_of_data dn;  		set_new_dnode(&dn, inode, NULL, NULL, new_nid); @@ -392,40 +397,43 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,  	return 0;  } -int f2fs_getxattr(struct inode *inode, int name_index, const char *name, +int f2fs_getxattr(struct inode *inode, int index, const char *name,  		void *buffer, size_t buffer_size)  {  	struct f2fs_xattr_entry *entry;  	void *base_addr;  	int error = 0; -	size_t value_len, name_len; +	size_t size, len;  	if (name == NULL)  		return -EINVAL; -	name_len = strlen(name); + +	len = strlen(name); +	if (len > F2FS_NAME_LEN) +		return -ERANGE;  	base_addr = read_all_xattrs(inode, NULL);  	if (!base_addr)  		return -ENOMEM; -	entry = __find_xattr(base_addr, name_index, name_len, name); +	entry = __find_xattr(base_addr, index, len, name);  	if (IS_XATTR_LAST_ENTRY(entry)) {  		error = -ENODATA;  		goto cleanup;  	} -	value_len = le16_to_cpu(entry->e_value_size); +	size = le16_to_cpu(entry->e_value_size); -	if (buffer && value_len > buffer_size) { +	if (buffer && size > buffer_size) {  		error = -ERANGE;  		goto cleanup;  	}  	if (buffer) {  		char *pval = entry->e_name + entry->e_name_len; -		memcpy(buffer, pval, value_len); +		memcpy(buffer, pval, size);  	} -	error = value_len; +	error = size;  cleanup:  	kzfree(base_addr); @@ -469,16 +477,15 @@ cleanup:  	return error;  } -int f2fs_setxattr(struct inode *inode, int name_index, const char *name, -			const void *value, size_t value_len, struct page *ipage) +static int __f2fs_setxattr(struct inode *inode, int index, +			const char *name, const void *value, size_t size, +			struct page *ipage, int flags)  { -	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);  	struct f2fs_inode_info *fi = F2FS_I(inode);  	struct f2fs_xattr_entry *here, *last;  	void *base_addr;  	int found, newsize; -	size_t name_len; -	int ilock; +	size_t len;  	__u32 new_hsize;  	int error = -ENOMEM; @@ -486,32 +493,35 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,  		return -EINVAL;  	if (value == NULL) -		value_len = 0; +		size = 0; -	name_len = strlen(name); +	len = strlen(name); -	if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) +	if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode))  		return -ERANGE; -	f2fs_balance_fs(sbi); - -	ilock = mutex_lock_op(sbi); -  	base_addr = read_all_xattrs(inode, ipage);  	if (!base_addr)  		goto exit;  	/* find entry with wanted name. */ -	here = __find_xattr(base_addr, name_index, name_len, name); +	here = __find_xattr(base_addr, index, len, name);  	found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; -	last = here; +	if ((flags & XATTR_REPLACE) && !found) { +		error = -ENODATA; +		goto exit; +	} else if ((flags & XATTR_CREATE) && found) { +		error = -EEXIST; +		goto exit; +	} + +	last = here;  	while (!IS_XATTR_LAST_ENTRY(last))  		last = XATTR_NEXT_ENTRY(last); -	newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + -			name_len + value_len); +	newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);  	/* 1. Check space */  	if (value) { @@ -522,9 +532,9 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,  		 */  		free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);  		if (found) -			free = free - ENTRY_SIZE(here); +			free = free + ENTRY_SIZE(here); -		if (free < newsize) { +		if (unlikely(free < newsize)) {  			error = -ENOSPC;  			goto exit;  		} @@ -554,12 +564,12 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,  		 * We just write new entry.  		 */  		memset(last, 0, newsize); -		last->e_name_index = name_index; -		last->e_name_len = name_len; -		memcpy(last->e_name, name, name_len); -		pval = last->e_name + name_len; -		memcpy(pval, value, value_len); -		last->e_value_size = cpu_to_le16(value_len); +		last->e_name_index = index; +		last->e_name_len = len; +		memcpy(last->e_name, name, len); +		pval = last->e_name + len; +		memcpy(pval, value, size); +		last->e_value_size = cpu_to_le16(size);  		new_hsize += newsize;  	} @@ -578,7 +588,29 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,  	else  		update_inode_page(inode);  exit: -	mutex_unlock_op(sbi, ilock);  	kzfree(base_addr);  	return error;  } + +int f2fs_setxattr(struct inode *inode, int index, const char *name, +				const void *value, size_t size, +				struct page *ipage, int flags) +{ +	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +	int err; + +	/* this case is only from init_inode_metadata */ +	if (ipage) +		return __f2fs_setxattr(inode, index, name, value, +						size, ipage, flags); +	f2fs_balance_fs(sbi); + +	f2fs_lock_op(sbi); +	/* protect xattr_ver */ +	down_write(&F2FS_I(inode)->i_sem); +	err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); +	up_write(&F2FS_I(inode)->i_sem); +	f2fs_unlock_op(sbi); + +	return err; +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 02a08fb88a1..34ab7dbcf5e 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -108,26 +108,24 @@ struct f2fs_xattr_entry {  #ifdef CONFIG_F2FS_FS_XATTR  extern const struct xattr_handler f2fs_xattr_user_handler;  extern const struct xattr_handler f2fs_xattr_trusted_handler; -extern const struct xattr_handler f2fs_xattr_acl_access_handler; -extern const struct xattr_handler f2fs_xattr_acl_default_handler;  extern const struct xattr_handler f2fs_xattr_advise_handler;  extern const struct xattr_handler f2fs_xattr_security_handler;  extern const struct xattr_handler *f2fs_xattr_handlers[];  extern int f2fs_setxattr(struct inode *, int, const char *, -				const void *, size_t, struct page *); +				const void *, size_t, struct page *, int);  extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);  extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);  #else  #define f2fs_xattr_handlers	NULL -static inline int f2fs_setxattr(struct inode *inode, int name_index, -		const char *name, const void *value, size_t value_len) +static inline int f2fs_setxattr(struct inode *inode, int index, +		const char *name, const void *value, size_t size, int flags)  {  	return -EOPNOTSUPP;  } -static inline int f2fs_getxattr(struct inode *inode, int name_index, +static inline int f2fs_getxattr(struct inode *inode, int index,  		const char *name, void *buffer, size_t buffer_size)  {  	return -EOPNOTSUPP;  | 
