diff options
Diffstat (limited to 'fs/nilfs2')
35 files changed, 2999 insertions, 1440 deletions
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig index 251da07b2a1..80da8eb2739 100644 --- a/fs/nilfs2/Kconfig +++ b/fs/nilfs2/Kconfig @@ -1,6 +1,5 @@ config NILFS2_FS - tristate "NILFS2 file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + tristate "NILFS2 file system support" select CRC32 help NILFS2 is a log-structured file system (LFS) supporting continuous diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index d7fd696e595..741fd02e044 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -398,6 +398,69 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, } /** + * nilfs_palloc_count_desc_blocks - count descriptor blocks number + * @inode: inode of metadata file using this allocator + * @desc_blocks: descriptor blocks number [out] + */ +static int nilfs_palloc_count_desc_blocks(struct inode *inode, + unsigned long *desc_blocks) +{ + unsigned long blknum; + int ret; + + ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); + if (likely(!ret)) + *desc_blocks = DIV_ROUND_UP( + blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); + return ret; +} + +/** + * nilfs_palloc_mdt_file_can_grow - check potential opportunity for + * MDT file growing + * @inode: inode of metadata file using this allocator + * @desc_blocks: known current descriptor blocks count + */ +static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, + unsigned long desc_blocks) +{ + return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) < + nilfs_palloc_groups_count(inode); +} + +/** + * nilfs_palloc_count_max_entries - count max number of entries that can be + * described by descriptor blocks count + * @inode: inode of metadata file using this allocator + * @nused: current number of used entries + * @nmaxp: max number of entries [out] + */ +int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) +{ + unsigned long desc_blocks = 0; + u64 entries_per_desc_block, nmax; + int err; + + err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks); + if (unlikely(err)) + return err; + + entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) * + nilfs_palloc_groups_per_desc_block(inode); + nmax = entries_per_desc_block * desc_blocks; + + if (nused == nmax && + nilfs_palloc_mdt_file_can_grow(inode, desc_blocks)) + nmax += entries_per_desc_block; + + if (nused > nmax) + return -ERANGE; + + *nmaxp = nmax; + return 0; +} + +/** * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the allocation @@ -489,8 +552,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, void nilfs_palloc_commit_alloc_entry(struct inode *inode, struct nilfs_palloc_req *req) { - nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); - nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); + mark_buffer_dirty(req->pr_bitmap_bh); + mark_buffer_dirty(req->pr_desc_bh); nilfs_mdt_mark_dirty(inode); brelse(req->pr_bitmap_bh); @@ -521,14 +584,14 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, group_offset, bitmap)) printk(KERN_WARNING "%s: entry number %llu already freed\n", __func__, (unsigned long long)req->pr_entry_nr); - - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + else + nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); - nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); - nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); + mark_buffer_dirty(req->pr_desc_bh); + mark_buffer_dirty(req->pr_bitmap_bh); nilfs_mdt_mark_dirty(inode); brelse(req->pr_bitmap_bh); @@ -558,8 +621,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, group_offset, bitmap)) printk(KERN_WARNING "%s: entry number %llu already freed\n", __func__, (unsigned long long)req->pr_entry_nr); - - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + else + nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); @@ -646,7 +709,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) unsigned long group, group_offset; int i, j, n, ret; - for (i = 0; i < nitems; i += n) { + for (i = 0; i < nitems; i = j) { group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); if (ret < 0) @@ -665,7 +728,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) for (j = i, n = 0; (j < nitems) && nilfs_palloc_group_is_in(inode, group, entry_nrs[j]); - j++, n++) { + j++) { nilfs_palloc_group(inode, entry_nrs[j], &group_offset); if (!nilfs_clear_bit_atomic( nilfs_mdt_bgl_lock(inode, group), @@ -674,6 +737,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) "%s: entry number %llu already freed\n", __func__, (unsigned long long)entry_nrs[j]); + } else { + n++; } } nilfs_palloc_group_desc_add_entries(inode, group, desc, n); @@ -681,8 +746,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) kunmap(bitmap_bh->b_page); kunmap(desc_bh->b_page); - nilfs_mdt_mark_buffer_dirty(desc_bh); - nilfs_mdt_mark_buffer_dirty(bitmap_bh); + mark_buffer_dirty(desc_bh); + mark_buffer_dirty(bitmap_bh); nilfs_mdt_mark_dirty(inode); brelse(bitmap_bh); diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 9af34a7e6e1..4bd6451b570 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -48,6 +48,8 @@ int nilfs_palloc_get_entry_block(struct inode *, __u64, int, void *nilfs_palloc_block_get_entry(const struct inode *, __u64, const struct buffer_head *, void *); +int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); + /** * nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) @@ -74,17 +76,25 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_set_bit_atomic ext2_set_bit_atomic #define nilfs_clear_bit_atomic ext2_clear_bit_atomic -#define nilfs_find_next_zero_bit ext2_find_next_zero_bit +#define nilfs_find_next_zero_bit find_next_zero_bit_le -/* - * persistent object allocator cache +/** + * struct nilfs_bh_assoc - block offset and buffer head association + * @blkoff: block offset + * @bh: buffer head */ - struct nilfs_bh_assoc { unsigned long blkoff; struct buffer_head *bh; }; +/** + * struct nilfs_palloc_cache - persistent object allocator cache + * @lock: cache protecting lock + * @prev_desc: blockgroup descriptors cache + * @prev_bitmap: blockgroup bitmap cache + * @prev_entry: translation entries cache + */ struct nilfs_palloc_cache { spinlock_t lock; struct nilfs_bh_assoc prev_desc; diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 8b782b062ba..aadbd0b5e3e 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -25,7 +25,6 @@ #include <linux/errno.h> #include "nilfs.h" #include "bmap.h" -#include "sb.h" #include "btree.h" #include "direct.h" #include "btnode.h" @@ -35,7 +34,22 @@ struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) { - return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); + struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info; + + return nilfs->ns_dat; +} + +static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, + const char *fname, int err) +{ + struct inode *inode = bmap->b_inode; + + if (err == -EINVAL) { + nilfs_error(inode->i_sb, fname, + "broken bmap (inode number=%lu)\n", inode->i_ino); + err = -EIO; + } + return err; } /** @@ -66,8 +80,10 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); - if (ret < 0) + if (ret < 0) { + ret = nilfs_bmap_convert_error(bmap, __func__, ret); goto out; + } if (NILFS_BMAP_USE_VBN(bmap)) { ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, &blocknr); @@ -88,7 +104,8 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); up_read(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) @@ -144,7 +161,8 @@ int nilfs_bmap_insert(struct nilfs_bmap *bmap, down_write(&bmap->b_sem); ret = nilfs_bmap_do_insert(bmap, key, rec); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) @@ -180,9 +198,12 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) down_read(&bmap->b_sem); ret = bmap->b_ops->bop_last_key(bmap, &lastkey); - if (!ret) - *key = lastkey; up_read(&bmap->b_sem); + + if (ret < 0) + ret = nilfs_bmap_convert_error(bmap, __func__, ret); + else + *key = lastkey; return ret; } @@ -210,7 +231,8 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) down_write(&bmap->b_sem); ret = nilfs_bmap_do_delete(bmap, key); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) @@ -261,7 +283,8 @@ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) down_write(&bmap->b_sem); ret = nilfs_bmap_do_truncate(bmap, key); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -300,7 +323,8 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) down_write(&bmap->b_sem); ret = bmap->b_ops->bop_propagate(bmap, bh); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -344,7 +368,8 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap, down_write(&bmap->b_sem); ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -373,7 +398,8 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) down_write(&bmap->b_sem); ret = bmap->b_ops->bop_mark(bmap, key, level); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -400,17 +426,6 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) /* * Internal use only */ - -void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) -{ - inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); -} - -void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) -{ - inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); -} - __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index bde1c0aa2e1..b89e68076ad 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -135,6 +135,13 @@ struct nilfs_bmap { /* state */ #define NILFS_BMAP_DIRTY 0x00000001 +/** + * struct nilfs_bmap_store - shadow copy of bmap state + * @data: cached raw block mapping of on-disk inode + * @last_allocated_key: cached value of last allocated key for data block + * @last_allocated_ptr: cached value of last allocated ptr for data block + * @state: cached value of state field of bmap structure + */ struct nilfs_bmap_store { __le64 data[NILFS_BMAP_SIZE / sizeof(__le64)]; __u64 last_allocated_key; @@ -240,9 +247,6 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); -void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); -void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); - /* Assume that bmap semaphore is locked. */ static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 5115814cb74..a35ae35e693 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -34,22 +34,6 @@ #include "page.h" #include "btnode.h" - -void nilfs_btnode_cache_init_once(struct address_space *btnc) -{ - nilfs_mapping_init_once(btnc); -} - -static const struct address_space_operations def_btnode_aops = { - .sync_page = block_sync_page, -}; - -void nilfs_btnode_cache_init(struct address_space *btnc, - struct backing_dev_info *bdi) -{ - nilfs_mapping_init(btnc, bdi, &def_btnode_aops); -} - void nilfs_btnode_cache_clear(struct address_space *btnc) { invalidate_mapping_pages(btnc, 0, -1); @@ -72,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) BUG(); } memset(bh->b_data, 0, 1 << inode->i_blkbits); - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = blocknr; set_buffer_mapped(bh); set_buffer_uptodate(bh); @@ -104,11 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, if (pblocknr == 0) { pblocknr = blocknr; if (inode->i_ino != NILFS_DAT_INO) { - struct inode *dat = - nilfs_dat_inode(NILFS_I_NILFS(inode)); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; /* blocknr is a virtual block number */ - err = nilfs_dat_translate(dat, blocknr, &pblocknr); + err = nilfs_dat_translate(nilfs->ns_dat, blocknr, + &pblocknr); if (unlikely(err)) { brelse(bh); goto out_locked; @@ -131,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, goto found; } set_buffer_mapped(bh); - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -270,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); - nilfs_btnode_mark_dirty(obh); + mark_buffer_dirty(obh); spin_lock_irq(&btnc->tree_lock); radix_tree_delete(&btnc->page_tree, oldkey); @@ -282,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, unlock_page(opage); } else { nilfs_copy_buffer(nbh, obh); - nilfs_btnode_mark_dirty(nbh); + mark_buffer_dirty(nbh); nbh->b_blocknr = newkey; ctxt->bh = nbh; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 79037494f1e..d876b565ce6 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -29,7 +29,13 @@ #include <linux/fs.h> #include <linux/backing-dev.h> - +/** + * struct nilfs_btnode_chkey_ctxt - change key context + * @oldkey: old key of block's moving content + * @newkey: new key for block's content + * @bh: buffer head of old buffer + * @newbh: buffer head of new buffer + */ struct nilfs_btnode_chkey_ctxt { __u64 oldkey; __u64 newkey; @@ -37,8 +43,6 @@ struct nilfs_btnode_chkey_ctxt { struct buffer_head *newbh; }; -void nilfs_btnode_cache_init_once(struct address_space *); -void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); @@ -52,7 +56,4 @@ void nilfs_btnode_commit_change_key(struct address_space *, void nilfs_btnode_abort_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); -#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh) - - #endif /* _NILFS_BTNODE_H */ diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 300c2bc00c3..b2e3ff34762 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree, nilfs_btree_get_nonroot_node(path, level), path[level].bp_index, key); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); } while ((path[level].bp_index == 0) && (++level < nilfs_btree_height(btree) - 1)); } @@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree, nilfs_btree_node_insert(node, path[level].bp_index, *keyp, *ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, @@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree, nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree, nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, @@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree, nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); newkey = nilfs_btree_node_get_key(right, 0); newptr = path[level].bp_newreq.bpr_ptr; @@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree, nilfs_btree_node_set_level(root, level + 1); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; @@ -1174,7 +1174,7 @@ static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) if (ret < 0) goto out; nilfs_btree_commit_insert(btree, path, level, key, ptr); - nilfs_bmap_add_blocks(btree, stats.bs_nblocks); + nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); out: nilfs_btree_free_path(path); @@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree, nilfs_btree_node_delete(node, path[level].bp_index, keyp, ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, @@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree, nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; @@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree, nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; @@ -1346,6 +1346,11 @@ static void nilfs_btree_shrink(struct nilfs_bmap *btree, path[level].bp_bh = NULL; } +static void nilfs_btree_nop(struct nilfs_bmap *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ +} static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, @@ -1356,20 +1361,19 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; - int pindex, level, ncmin, ncmax, ncblk, ret; + int pindex, dindex, level, ncmin, ncmax, ncblk, ret; ret = 0; stats->bs_nblocks = 0; ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); ncblk = nilfs_btree_nchildren_per_block(btree); - for (level = NILFS_BTREE_LEVEL_NODE_MIN; + for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); path[level].bp_oldreq.bpr_ptr = - nilfs_btree_node_get_ptr(node, path[level].bp_index, - ncblk); + nilfs_btree_node_get_ptr(node, dindex, ncblk); ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) @@ -1383,6 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; + dindex = pindex; if (pindex > 0) { /* left sibling */ @@ -1421,6 +1426,14 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_concat_right; stats->bs_nblocks++; + /* + * When merging right sibling node + * into the current node, pointer to + * the right sibling node must be + * terminated instead. The adjustment + * below is required for that. + */ + dindex = pindex + 1; /* continue; */ } } else { @@ -1431,29 +1444,31 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_shrink; stats->bs_nblocks += 2; + level++; + path[level].bp_op = nilfs_btree_nop; + goto shrink_root_child; } else { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; + goto out; } - - goto out; - } } + /* child of the root node is deleted */ + path[level].bp_op = nilfs_btree_do_delete; + stats->bs_nblocks++; + +shrink_root_child: node = nilfs_btree_get_root(btree); path[level].bp_oldreq.bpr_ptr = - nilfs_btree_node_get_ptr(node, path[level].bp_index, + nilfs_btree_node_get_ptr(node, dindex, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; - /* child of the root node is deleted */ - path[level].bp_op = nilfs_btree_do_delete; - stats->bs_nblocks++; - /* success */ out: *levelp = level; @@ -1511,7 +1526,7 @@ static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) if (ret < 0) goto out; nilfs_btree_commit_delete(btree, path, level, dat); - nilfs_bmap_sub_blocks(btree, stats.bs_nblocks); + nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); out: nilfs_btree_free_path(path); @@ -1709,7 +1724,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); if (!buffer_dirty(bh)) - nilfs_btnode_mark_dirty(bh); + mark_buffer_dirty(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); @@ -1776,7 +1791,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, return ret; nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, di, ni, bh); - nilfs_bmap_add_blocks(btree, stats.bs_nblocks); + nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); return 0; } @@ -1787,7 +1802,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, { while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); return 0; } @@ -2229,7 +2244,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) } if (!buffer_dirty(bh)) - nilfs_btnode_mark_dirty(bh); + mark_buffer_dirty(bh); brelse(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 5ff15a8a102..0d58075f34e 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, if (!nilfs_cpfile_is_in_first(cpfile, cno)) nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, kaddr, 1); - nilfs_mdt_mark_buffer_dirty(cp_bh); + mark_buffer_dirty(cp_bh); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, 1); - kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); + kunmap_atomic(kaddr); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); } @@ -286,7 +286,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, __u64 cno; void *kaddr; unsigned long tnicps; - int ret, ncps, nicps, count, i; + int ret, ncps, nicps, nss, count, i; if (unlikely(start == 0 || start > end)) { printk(KERN_ERR "%s: invalid range of checkpoint numbers: " @@ -301,6 +301,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, if (ret < 0) goto out_sem; tnicps = 0; + nss = 0; for (cno = start; cno < end; cno += ncps) { ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end); @@ -313,20 +314,21 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; } - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint( cpfile, cno, cp_bh, kaddr); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { - WARN_ON(nilfs_checkpoint_snapshot(cp)); - if (!nilfs_checkpoint_invalid(cp)) { + if (nilfs_checkpoint_snapshot(cp)) { + nss++; + } else if (!nilfs_checkpoint_invalid(cp)) { nilfs_checkpoint_set_invalid(cp); nicps++; } } if (nicps > 0) { tnicps += nicps; - nilfs_mdt_mark_buffer_dirty(cp_bh); + mark_buffer_dirty(cp_bh); nilfs_mdt_mark_dirty(cpfile); if (!nilfs_cpfile_is_in_first(cpfile, cno)) { count = @@ -334,7 +336,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cp_bh, kaddr, nicps); if (count == 0) { /* make hole */ - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(cp_bh); ret = nilfs_cpfile_delete_checkpoint_block( @@ -349,21 +351,23 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, } } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(cp_bh); } if (tnicps > 0) { - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } brelse(header_bh); + if (nss > 0) + ret = -EBUSY; out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); @@ -408,7 +412,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, continue; /* skip hole */ } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { if (!nilfs_checkpoint_invalid(cp)) { @@ -418,7 +422,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, n++; } } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); } @@ -451,10 +455,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); if (curr == 0) { ret = 0; @@ -472,7 +476,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = 0; /* No snapshots (started from a hole block) */ goto out; } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); while (n < nci) { cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); curr = ~(__u64)0; /* Terminator */ @@ -488,7 +492,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); if (curr_blkoff != next_blkoff) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &bh); @@ -496,12 +500,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, WARN_ON(ret == -ENOENT); goto out; } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); } curr = next; curr_blkoff = next_blkoff; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); *cnop = curr; ret = n; @@ -592,24 +596,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } if (nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) goto out_cp; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); list = &header->ch_snapshot_list; curr_bh = header_bh; @@ -621,13 +625,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); curr = prev; if (curr_blkoff != prev_blkoff) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(curr_bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &curr_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); + kaddr = kmap_atomic(curr_bh->b_page); } curr_blkoff = prev_blkoff; cp = nilfs_cpfile_block_get_checkpoint( @@ -635,7 +639,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) list = &cp->cp_snapshot_list; prev = le64_to_cpu(list->ssl_prev); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, @@ -647,34 +651,34 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) get_bh(prev_bh); } - kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); + kaddr = kmap_atomic(curr_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, curr, curr_bh, kaddr); list->ssl_prev = cpu_to_le64(cno); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); nilfs_checkpoint_set_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); + kaddr = kmap_atomic(prev_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, prev, prev_bh, kaddr); list->ssl_next = cpu_to_le64(cno); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_nsnapshots, 1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(prev_bh); - nilfs_mdt_mark_buffer_dirty(curr_bh); - nilfs_mdt_mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(prev_bh); + mark_buffer_dirty(curr_bh); + mark_buffer_dirty(cp_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); brelse(prev_bh); @@ -710,23 +714,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } if (!nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } list = &cp->cp_snapshot_list; next = le64_to_cpu(list->ssl_next); prev = le64_to_cpu(list->ssl_prev); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) @@ -750,34 +754,34 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) get_bh(prev_bh); } - kaddr = kmap_atomic(next_bh->b_page, KM_USER0); + kaddr = kmap_atomic(next_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, next, next_bh, kaddr); list->ssl_prev = cpu_to_le64(prev); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); + kaddr = kmap_atomic(prev_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, prev, prev_bh, kaddr); list->ssl_next = cpu_to_le64(next); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); nilfs_checkpoint_clear_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_nsnapshots, -1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(next_bh); - nilfs_mdt_mark_buffer_dirty(prev_bh); - nilfs_mdt_mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(next_bh); + mark_buffer_dirty(prev_bh); + mark_buffer_dirty(cp_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); brelse(prev_bh); @@ -829,13 +833,13 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); if (nilfs_checkpoint_invalid(cp)) ret = -ENOENT; else ret = nilfs_checkpoint_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); out: @@ -912,12 +916,12 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); cpstat->cs_cno = nilfs_mdt_cno(cpfile); cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); out_sem: @@ -938,6 +942,18 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, struct inode *cpfile; int err; + if (cpsize > sb->s_blocksize) { + printk(KERN_ERR + "NILFS: too large checkpoint size: %zu bytes.\n", + cpsize); + return -EINVAL; + } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) { + printk(KERN_ERR + "NILFS: too small checkpoint size: %zu bytes.\n", + cpsize); + return -EINVAL; + } + cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); if (unlikely(!cpfile)) return -ENOMEM; diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 49c844dab33..0d5fada9119 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -33,6 +33,12 @@ #define NILFS_CNO_MIN ((__u64)1) #define NILFS_CNO_MAX (~(__u64)0) +/** + * struct nilfs_dat_info - on-memory private data of DAT file + * @mi: on-memory private data of metadata file + * @palloc_cache: persistent object allocator cache of DAT file + * @shadow: shadow map of DAT file + */ struct nilfs_dat_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; @@ -54,7 +60,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat, static void nilfs_dat_commit_entry(struct inode *dat, struct nilfs_palloc_req *req) { - nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); + mark_buffer_dirty(req->pr_entry_bh); nilfs_mdt_mark_dirty(dat); brelse(req->pr_entry_bh); } @@ -85,13 +91,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MAX); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_palloc_commit_alloc_entry(dat, req); nilfs_dat_commit_entry(dat, req); @@ -109,13 +115,13 @@ static void nilfs_dat_commit_free(struct inode *dat, struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MIN); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); nilfs_palloc_commit_free_entry(dat, req); @@ -136,12 +142,12 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); } @@ -160,12 +166,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) return ret; } - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (blocknr == 0) { ret = nilfs_palloc_prepare_free_entry(dat, req); @@ -186,7 +192,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, sector_t blocknr; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); end = start = le64_to_cpu(entry->de_start); @@ -196,7 +202,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (blocknr == 0) nilfs_dat_commit_free(dat, req); @@ -211,12 +217,12 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) sector_t blocknr; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (start == nilfs_mdt_cno(dat) && blocknr == 0) nilfs_palloc_abort_free_entry(dat, req); @@ -335,7 +341,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) * the device at this point. * * To prevent nilfs_dat_translate() from returning the - * uncommited block number, this makes a copy of the entry + * uncommitted block number, this makes a copy of the entry * buffer and redirects nilfs_dat_translate() to the copy. */ if (!buffer_nilfs_redirected(entry_bh)) { @@ -346,22 +352,22 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) } } - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, (unsigned long long)vblocknr, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end)); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); return -EINVAL; } WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(entry_bh); + mark_buffer_dirty(entry_bh); nilfs_mdt_mark_dirty(dat); brelse(entry_bh); @@ -409,7 +415,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) } } - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); blocknr = le64_to_cpu(entry->de_blocknr); if (blocknr == 0) { @@ -419,7 +425,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) *blocknrp = blocknr; out: - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); return ret; } @@ -440,7 +446,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, 0, &entry_bh); if (ret < 0) return ret; - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(entry_bh->b_page); /* last virtual block number in this block */ first = vinfo->vi_vblocknr; do_div(first, entries_per_block); @@ -456,7 +462,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, vinfo->vi_end = le64_to_cpu(entry->de_end); vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); } @@ -478,6 +484,18 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, struct nilfs_dat_info *di; int err; + if (entry_size > sb->s_blocksize) { + printk(KERN_ERR + "NILFS: too large DAT entry size: %zu bytes.\n", + entry_size); + return -EINVAL; + } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { + printk(KERN_ERR + "NILFS: too small DAT entry size: %zu bytes.\n", + entry_size); + return -EINVAL; + } + dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); if (unlikely(!dat)) return -ENOMEM; diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index cb003c8ee1f..197a63e9d10 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -91,7 +91,6 @@ static void nilfs_commit_chunk(struct page *page, unsigned from, unsigned to) { struct inode *dir = mapping->host; - struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); loff_t pos = page_offset(page) + from; unsigned len = to - from; unsigned nr_dirty, copied; @@ -103,7 +102,7 @@ static void nilfs_commit_chunk(struct page *page, i_size_write(dir, pos + copied); if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); - err = nilfs_set_file_dirty(sbi, dir, nr_dirty); + err = nilfs_set_file_dirty(dir, nr_dirty); WARN_ON(err); /* do not happen */ unlock_page(page); } @@ -252,27 +251,23 @@ nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; } -static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +static int nilfs_readdir(struct file *file, struct dir_context *ctx) { - loff_t pos = filp->f_pos; - struct inode *inode = filp->f_dentry->d_inode; + loff_t pos = ctx->pos; + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; unsigned long npages = dir_pages(inode); /* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ - unsigned char *types = NULL; - int ret; if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) - goto success; - - types = nilfs_filetype_table; + return 0; for ( ; n < npages; n++, offset = 0) { char *kaddr, *limit; @@ -282,9 +277,8 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (IS_ERR(page)) { nilfs_error(sb, __func__, "bad page in #%lu", inode->i_ino); - filp->f_pos += PAGE_CACHE_SIZE - offset; - ret = -EIO; - goto done; + ctx->pos += PAGE_CACHE_SIZE - offset; + return -EIO; } kaddr = page_address(page); de = (struct nilfs_dir_entry *)(kaddr + offset); @@ -294,35 +288,28 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (de->rec_len == 0) { nilfs_error(sb, __func__, "zero-length directory entry"); - ret = -EIO; nilfs_put_page(page); - goto done; + return -EIO; } if (de->inode) { - int over; - unsigned char d_type = DT_UNKNOWN; + unsigned char t; - if (types && de->file_type < NILFS_FT_MAX) - d_type = types[de->file_type]; + if (de->file_type < NILFS_FT_MAX) + t = nilfs_filetype_table[de->file_type]; + else + t = DT_UNKNOWN; - offset = (char *)de - kaddr; - over = filldir(dirent, de->name, de->name_len, - (n<<PAGE_CACHE_SHIFT) | offset, - le64_to_cpu(de->inode), d_type); - if (over) { + if (!dir_emit(ctx, de->name, de->name_len, + le64_to_cpu(de->inode), t)) { nilfs_put_page(page); - goto success; + return 0; } } - filp->f_pos += nilfs_rec_len_from_disk(de->rec_len); + ctx->pos += nilfs_rec_len_from_disk(de->rec_len); } nilfs_put_page(page); } - -success: - ret = 0; -done: - return ret; + return 0; } /* @@ -441,7 +428,6 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, nilfs_commit_chunk(page, mapping, from, to); nilfs_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; -/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ } /* @@ -532,7 +518,6 @@ got_it: nilfs_set_de_type(de, inode); nilfs_commit_chunk(page, page->mapping, from, to); dir->i_mtime = dir->i_ctime = CURRENT_TIME; -/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ nilfs_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: @@ -580,7 +565,6 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) dir->inode = 0; nilfs_commit_chunk(page, mapping, from, to); inode->i_ctime = inode->i_mtime = CURRENT_TIME; -/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ out: nilfs_put_page(page); return err; @@ -606,7 +590,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) unlock_page(page); goto fail; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, 0, chunk_size); de = (struct nilfs_dir_entry *)kaddr; de->name_len = 1; @@ -621,7 +605,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); nilfs_set_de_type(de, inode); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_commit_chunk(page, mapping, 0, chunk_size); fail: page_cache_release(page); @@ -682,10 +666,10 @@ not_empty: const struct file_operations nilfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = nilfs_readdir, + .iterate = nilfs_readdir, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = nilfs_ioctl, + .compat_ioctl = nilfs_compat_ioctl, #endif /* CONFIG_COMPAT */ .fsync = nilfs_sync_file, diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 324d80c5751..82f4865e86d 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -146,7 +146,7 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) if (NILFS_BMAP_USE_VBN(bmap)) nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr); - nilfs_bmap_add_blocks(bmap, 1); + nilfs_inode_add_blocks(bmap->b_inode, 1); } return ret; } @@ -168,7 +168,7 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) if (!ret) { nilfs_bmap_commit_end_ptr(bmap, &req, dat); nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR); - nilfs_bmap_sub_blocks(bmap, 1); + nilfs_inode_sub_blocks(bmap->b_inode, 1); } return ret; } diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h index a71cc412b65..19ccbf9522a 100644 --- a/fs/nilfs2/export.h +++ b/fs/nilfs2/export.h @@ -5,6 +5,14 @@ extern const struct export_operations nilfs_export_ops; +/** + * struct nilfs_fid - NILFS file id type + * @cno: checkpoint number + * @ino: inode number + * @gen: file generation (version) for NFS + * @parent_gen: parent generation (version) for NFS + * @parent_ino: parent inode number + */ struct nilfs_fid { u64 cno; u64 ino; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index c9a30d7ff6f..24978153c0c 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -27,7 +27,7 @@ #include "nilfs.h" #include "segment.h" -int nilfs_sync_file(struct file *file, int datasync) +int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { /* * Called from fsync() system call @@ -37,45 +37,58 @@ int nilfs_sync_file(struct file *file, int datasync) * This function should be implemented when the writeback function * will be implemented. */ + struct the_nilfs *nilfs; struct inode *inode = file->f_mapping->host; int err; - if (!nilfs_inode_dirty(inode)) - return 0; - - if (datasync) - err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, - LLONG_MAX); - else - err = nilfs_construct_segment(inode->i_sb); + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + mutex_lock(&inode->i_mutex); + + if (nilfs_inode_dirty(inode)) { + if (datasync) + err = nilfs_construct_dsync_segment(inode->i_sb, inode, + 0, LLONG_MAX); + else + err = nilfs_construct_segment(inode->i_sb); + } + mutex_unlock(&inode->i_mutex); + nilfs = inode->i_sb->s_fs_info; + if (!err && nilfs_test_opt(nilfs, BARRIER)) { + err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (err != -EIO) + err = 0; + } return err; } static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; - int ret; + int ret = 0; - if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs))) + if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) return VM_FAULT_SIGBUS; /* -ENOSPC */ + sb_start_pagefault(inode->i_sb); lock_page(page); if (page->mapping != inode->i_mapping || page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); - return VM_FAULT_NOPAGE; /* make the VM retry the fault */ + ret = -EFAULT; /* make the VM retry the fault */ + goto out; } /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) { - unlock_page(page); + if (PageMappedToDisk(page)) goto mapped; - } + if (page_has_buffers(page)) { struct buffer_head *bh, *head; int fully_mapped = 1; @@ -90,7 +103,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (fully_mapped) { SetPageMappedToDisk(page); - unlock_page(page); goto mapped; } } @@ -102,31 +114,35 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); /* never returns -ENOMEM, but may return -ENOSPC */ if (unlikely(ret)) - return VM_FAULT_SIGBUS; + goto out; - ret = block_page_mkwrite(vma, vmf, nilfs_get_block); - if (unlikely(ret)) { + file_update_time(vma->vm_file); + ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); + if (ret) { nilfs_transaction_abort(inode->i_sb); - return ret; + goto out; } + nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); nilfs_transaction_commit(inode->i_sb); mapped: - SetPageChecked(page); - wait_on_page_writeback(page); - return 0; + wait_for_stable_page(page); + out: + sb_end_pagefault(inode->i_sb); + return block_page_mkwrite_return(ret); } static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = nilfs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &nilfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } @@ -136,13 +152,13 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) */ const struct file_operations nilfs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = nilfs_ioctl, + .compat_ioctl = nilfs_compat_ioctl, #endif /* CONFIG_COMPAT */ .mmap = nilfs_file_mmap, .open = generic_file_open, @@ -152,9 +168,9 @@ const struct file_operations nilfs_file_operations = { }; const struct inode_operations nilfs_file_inode_operations = { - .truncate = nilfs_truncate, .setattr = nilfs_setattr, .permission = nilfs_permission, + .fiemap = nilfs_fiemap, }; /* end of file */ diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 33ad25ddd5c..57ceaf33d17 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -48,10 +48,6 @@ #include "dat.h" #include "ifile.h" -static const struct address_space_operations def_gcinode_aops = { - .sync_page = block_sync_page, -}; - /* * nilfs_gccache_submit_read_data() - add data buffer and submit read request * @inode - gc inode @@ -88,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, goto out; if (pbn == 0) { - struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; - /* use original dat, not gc dat. */ - err = nilfs_dat_translate(dat_inode, vbn, &pbn); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn); if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ brelse(bh); goto failed; @@ -104,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, } if (!buffer_mapped(bh)) { - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_bdev = inode->i_sb->s_bdev; set_buffer_mapped(bh); } bh->b_blocknr = pbn; @@ -161,39 +157,26 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) if (buffer_dirty(bh)) return -EEXIST; - if (buffer_nilfs_node(bh)) { - if (nilfs_btree_broken_node_block(bh)) { - clear_buffer_uptodate(bh); - return -EIO; - } - nilfs_btnode_mark_dirty(bh); - } else { - nilfs_mark_buffer_dirty(bh); + if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) { + clear_buffer_uptodate(bh); + return -EIO; } + mark_buffer_dirty(bh); return 0; } int nilfs_init_gcinode(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); - inode->i_mapping->a_ops = &def_gcinode_aops; + inode->i_mapping->a_ops = &empty_aops; inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); - /* - * Add the inode to GC inode list. Garbage Collection - * is serialized and no two processes manipulate the - * list simultaneously. - */ - igrab(inode); - list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); - return 0; } @@ -208,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) while (!list_empty(head)) { ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 9f8a2da67f9..6548c7851b4 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -29,7 +29,11 @@ #include "alloc.h" #include "ifile.h" - +/** + * struct nilfs_ifile_info - on-memory private data of ifile + * @mi: on-memory private data of metadata file + * @palloc_cache: persistent object allocator cache of ifile + */ struct nilfs_ifile_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; @@ -80,7 +84,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, return ret; } nilfs_palloc_commit_alloc_entry(ifile, &req); - nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); + mark_buffer_dirty(req.pr_entry_bh); nilfs_mdt_mark_dirty(ifile); *out_ino = (ino_t)req.pr_entry_nr; *out_bh = req.pr_entry_bh; @@ -122,13 +126,13 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) return ret; } - kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req.pr_entry_bh->b_page); raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, req.pr_entry_bh, kaddr); raw_inode->i_flags = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); + mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); nilfs_palloc_commit_free_entry(ifile, &req); @@ -149,14 +153,31 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); - if (unlikely(err)) { - if (err == -EINVAL) - nilfs_error(sb, __func__, "ifile is broken"); - else - nilfs_warning(sb, __func__, - "unable to read inode: %lu", - (unsigned long) ino); - } + if (unlikely(err)) + nilfs_warning(sb, __func__, "unable to read inode: %lu", + (unsigned long) ino); + return err; +} + +/** + * nilfs_ifile_count_free_inodes - calculate free inodes count + * @ifile: ifile inode + * @nmaxinodes: current maximum of available inodes count [out] + * @nfreeinodes: free inodes count [out] + */ +int nilfs_ifile_count_free_inodes(struct inode *ifile, + u64 *nmaxinodes, u64 *nfreeinodes) +{ + u64 nused; + int err; + + *nmaxinodes = 0; + *nfreeinodes = 0; + + nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count); + err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); + if (likely(!err)) + *nfreeinodes = *nmaxinodes - nused; return err; } diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 59b6f2b51df..679674d1337 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); int nilfs_ifile_delete_inode(struct inode *, ino_t); int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); +int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); + int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, size_t inode_size, struct nilfs_inode *raw_inode, struct inode **inodep); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 71d4bc8464e..6252b173a46 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -25,7 +25,7 @@ #include <linux/gfp.h> #include <linux/mpage.h> #include <linux/writeback.h> -#include <linux/uio.h> +#include <linux/aio.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -34,6 +34,13 @@ #include "cpfile.h" #include "ifile.h" +/** + * struct nilfs_iget_args - arguments used during comparison between inodes + * @ino: inode number + * @cno: checkpoint number + * @root: pointer on NILFS root object (mounted checkpoint) + * @for_gc: inode for GC flag + */ struct nilfs_iget_args { u64 ino; __u64 cno; @@ -41,6 +48,24 @@ struct nilfs_iget_args { int for_gc; }; +void nilfs_inode_add_blocks(struct inode *inode, int n) +{ + struct nilfs_root *root = NILFS_I(inode)->i_root; + + inode_add_bytes(inode, (1 << inode->i_blkbits) * n); + if (root) + atomic64_add(n, &root->blocks_count); +} + +void nilfs_inode_sub_blocks(struct inode *inode, int n) +{ + struct nilfs_root *root = NILFS_I(inode)->i_root; + + inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); + if (root) + atomic64_sub(n, &root->blocks_count); +} + /** * nilfs_get_block() - get a file block on the filesystem (callback function) * @inode - inode struct of the target file @@ -56,14 +81,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) { struct nilfs_inode_info *ii = NILFS_I(inode); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; __u64 blknum = 0; int err = 0, ret; - struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; - down_read(&NILFS_MDT(dat)->mi_sem); + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); - up_read(&NILFS_MDT(dat)->mi_sem); + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); if (ret >= 0) { /* found */ map_bh(bh_result, inode->i_sb, blknum); if (ret > 0) @@ -96,11 +121,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, inode->i_ino, (unsigned long long)blkoff); err = 0; - } else if (err == -EINVAL) { - nilfs_error(inode->i_sb, __func__, - "broken bmap (inode=%lu)\n", - inode->i_ino); - err = -EIO; } nilfs_transaction_abort(inode->i_sb); goto out; @@ -109,6 +129,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); + set_buffer_delay(bh_result); map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed to proper value */ } else if (ret == -ENOENT) { @@ -154,6 +175,11 @@ static int nilfs_writepages(struct address_space *mapping, struct inode *inode = mapping->host; int err = 0; + if (inode->i_sb->s_flags & MS_RDONLY) { + nilfs_clear_dirty_pages(mapping, false); + return -EROFS; + } + if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_dsync_segment(inode->i_sb, inode, wbc->range_start, @@ -166,6 +192,18 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) struct inode *inode = page->mapping->host; int err; + if (inode->i_sb->s_flags & MS_RDONLY) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); @@ -181,18 +219,46 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) static int nilfs_set_page_dirty(struct page *page) { - int ret = __set_page_dirty_buffers(page); + int ret = __set_page_dirty_nobuffers(page); - if (ret) { + if (page_has_buffers(page)) { struct inode *inode = page->mapping->host; - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); - unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); - - nilfs_set_file_dirty(sbi, inode, nr_dirty); + unsigned nr_dirty = 0; + struct buffer_head *bh, *head; + + /* + * This page is locked by callers, and no other thread + * concurrently marks its buffers dirty since they are + * only dirtied through routines in fs/buffer.c in + * which call sites of mark_buffer_dirty are protected + * by page lock. + */ + bh = head = page_buffers(page); + do { + /* Do not mark hole blocks dirty */ + if (buffer_dirty(bh) || !buffer_mapped(bh)) + continue; + + set_buffer_dirty(bh); + nr_dirty++; + } while (bh = bh->b_this_page, bh != head); + + if (nr_dirty) + nilfs_set_file_dirty(inode, nr_dirty); } return ret; } +void nilfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, inode->i_size); + nilfs_truncate(inode); + } +} + static int nilfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -207,10 +273,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, err = block_write_begin(mapping, pos, len, flags, pagep, nilfs_get_block); if (unlikely(err)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - + nilfs_write_failed(mapping, pos + len); nilfs_transaction_abort(inode->i_sb); } return err; @@ -229,25 +292,27 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, start + copied); copied = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); + nilfs_set_file_dirty(inode, nr_dirty); err = nilfs_transaction_commit(inode->i_sb); return err ? : copied; } static ssize_t -nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); ssize_t size; if (rw == WRITE) return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, nilfs_get_block, NULL); + size = blockdev_direct_IO(rw, iocb, inode, iter, offset, + nilfs_get_block); /* * In case of error extending write may have instantiated a few @@ -255,10 +320,10 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, */ if (unlikely((rw & WRITE) && size < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if (end > isize) - vmtruncate(inode, isize); + nilfs_write_failed(mapping, end); } return size; @@ -267,7 +332,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, const struct address_space_operations nilfs_aops = { .writepage = nilfs_writepage, .readpage = nilfs_readpage, - .sync_page = block_sync_page, .writepages = nilfs_writepages, .set_page_dirty = nilfs_set_page_dirty, .readpages = nilfs_readpages, @@ -279,10 +343,10 @@ const struct address_space_operations nilfs_aops = { .is_partially_uptodate = block_is_partially_uptodate, }; -struct inode *nilfs_new_inode(struct inode *dir, int mode) +struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; @@ -306,7 +370,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - atomic_inc(&root->inodes_count); + atomic64_inc(&root->inodes_count); inode_init_owner(inode, dir, mode); inode->i_ino = ino; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -320,19 +384,16 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) /* No lock is needed; iget() ensures it. */ } - ii->i_flags = NILFS_I(dir)->i_flags; - if (S_ISLNK(mode)) - ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL); - if (!S_ISDIR(mode)) - ii->i_flags &= ~NILFS_DIRSYNC_FL; + ii->i_flags = nilfs_mask_flags( + mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); /* ii->i_file_acl = 0; */ /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); - spin_lock(&sbi->s_next_gen_lock); - inode->i_generation = sbi->s_next_generation++; - spin_unlock(&sbi->s_next_gen_lock); + spin_lock(&nilfs->ns_next_gen_lock); + inode->i_generation = nilfs->ns_next_generation++; + spin_unlock(&nilfs->ns_next_gen_lock); insert_inode_hash(inode); err = nilfs_init_acl(inode, dir); @@ -345,7 +406,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) failed_acl: failed_bmap: - inode->i_nlink = 0; + clear_nlink(inode); iput(inode); /* raw_inode will be deleted through generic_delete_inode() */ goto failed; @@ -364,17 +425,15 @@ void nilfs_set_inode_flags(struct inode *inode) inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC); - if (flags & NILFS_SYNC_FL) + if (flags & FS_SYNC_FL) inode->i_flags |= S_SYNC; - if (flags & NILFS_APPEND_FL) + if (flags & FS_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & NILFS_IMMUTABLE_FL) + if (flags & FS_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; -#ifndef NILFS_ATIME_DISABLE - if (flags & NILFS_NOATIME_FL) -#endif + if (flags & FS_NOATIME_FL) inode->i_flags |= S_NOATIME; - if (flags & NILFS_DIRSYNC_FL) + if (flags & FS_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); @@ -387,9 +446,9 @@ int nilfs_read_inode_common(struct inode *inode, int err; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); - inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); - inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); + i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); + set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); @@ -425,13 +484,12 @@ static int __nilfs_read_inode(struct super_block *sb, struct nilfs_root *root, unsigned long ino, struct inode *inode) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct inode *dat = nilfs_dat_inode(sbi->s_nilfs); + struct the_nilfs *nilfs = sb->s_fs_info; struct buffer_head *bh; struct nilfs_inode *raw_inode; int err; - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); if (unlikely(err)) goto bad_inode; @@ -461,7 +519,7 @@ static int __nilfs_read_inode(struct super_block *sb, } nilfs_ifile_unmap_inode(root->ifile, ino, bh); brelse(bh); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); return 0; @@ -470,7 +528,7 @@ static int __nilfs_read_inode(struct super_block *sb, brelse(bh); bad_inode: - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); return err; } @@ -577,8 +635,8 @@ void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode_info *ii = NILFS_I(inode); raw_inode->i_mode = cpu_to_le16(inode->i_mode); - raw_inode->i_uid = cpu_to_le32(inode->i_uid); - raw_inode->i_gid = cpu_to_le32(inode->i_gid); + raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); + raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le64(inode->i_size); raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); @@ -590,6 +648,16 @@ void nilfs_write_inode_common(struct inode *inode, raw_inode->i_flags = cpu_to_le32(ii->i_flags); raw_inode->i_generation = cpu_to_le32(inode->i_generation); + if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + /* zero-fill unused portion in the case of super root block */ + raw_inode->i_xattr = 0; + raw_inode->i_pad = 0; + memset((void *)raw_inode + sizeof(*raw_inode), 0, + nilfs->ns_inode_size - sizeof(*raw_inode)); + } + if (has_bmap) nilfs_bmap_write(ii->i_bmap, raw_inode); else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) @@ -629,7 +697,7 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, if (!test_bit(NILFS_I_BMAP, &ii->i_state)) return; - repeat: +repeat: ret = nilfs_bmap_last_key(ii->i_bmap, &b); if (ret == -ENOENT) return; @@ -646,14 +714,10 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, nilfs_bmap_truncate(ii->i_bmap, b) == 0)) goto repeat; - failed: - if (ret == -EINVAL) - nilfs_error(ii->vfs_inode.i_sb, __func__, - "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino); - else - nilfs_warning(ii->vfs_inode.i_sb, __func__, - "failed to truncate bmap (ino=%lu, err=%d)", - ii->vfs_inode.i_ino, ret); +failed: + nilfs_warning(ii->vfs_inode.i_sb, __func__, + "failed to truncate bmap (ino=%lu, err=%d)", + ii->vfs_inode.i_ino, ret); } void nilfs_truncate(struct inode *inode) @@ -682,7 +746,7 @@ void nilfs_truncate(struct inode *inode) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_mark_inode_dirty(inode); - nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); + nilfs_set_file_dirty(inode, 0); nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. But truncate has no return value. */ @@ -717,26 +781,26 @@ void nilfs_evict_inode(struct inode *inode) struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); + int ret; if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); nilfs_clear_inode(inode); return; } nilfs_transaction_begin(sb, &ti, 0); /* never fails */ - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); /* TODO: some of the following operations may fail. */ nilfs_truncate_bmap(ii, 0); nilfs_mark_inode_dirty(inode); - end_writeback(inode); + clear_inode(inode); - nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); - atomic_dec(&ii->i_root->inodes_count); + ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); + if (!ret) + atomic64_dec(&ii->i_root->inodes_count); nilfs_clear_inode(inode); @@ -764,9 +828,9 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { - err = vmtruncate(inode, iattr->ia_size); - if (unlikely(err)) - goto out_err; + inode_dio_wait(inode); + truncate_setsize(inode, iattr->ia_size); + nilfs_truncate(inode); } setattr_copy(inode, iattr); @@ -788,28 +852,27 @@ out_err: int nilfs_permission(struct inode *inode, int mask) { struct nilfs_root *root = NILFS_I(inode)->i_root; - if ((mask & MAY_WRITE) && root && root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask); } -int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, - struct buffer_head **pbh) +int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_inode_info *ii = NILFS_I(inode); int err; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); if (ii->i_bh == NULL) { - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); err = nilfs_ifile_get_inode_block(ii->i_root->ifile, inode->i_ino, pbh); if (unlikely(err)) return err; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); if (ii->i_bh == NULL) ii->i_bh = *pbh; else { @@ -820,36 +883,36 @@ int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, *pbh = ii->i_bh; get_bh(*pbh); - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return 0; } int nilfs_inode_dirty(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; int ret = 0; if (!list_empty(&ii->i_dirty)) { - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || test_bit(NILFS_I_BUSY, &ii->i_state); - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); } return ret; } -int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, - unsigned nr_dirty) +int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) { struct nilfs_inode_info *ii = NILFS_I(inode); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; - atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); + atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) return 0; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && !test_bit(NILFS_I_BUSY, &ii->i_state)) { /* Because this routine may race with nilfs_dispose_list(), @@ -857,35 +920,33 @@ int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { /* This will happen when somebody is freeing this inode. */ - nilfs_warning(sbi->s_super, __func__, + nilfs_warning(inode->i_sb, __func__, "cannot get inode (ino=%lu)\n", inode->i_ino); - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return -EINVAL; /* NILFS_I_DIRTY may remain for freeing inode */ } - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &sbi->s_dirty_files); + list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); set_bit(NILFS_I_QUEUED, &ii->i_state); } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return 0; } int nilfs_mark_inode_dirty(struct inode *inode) { - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); struct buffer_head *ibh; int err; - err = nilfs_load_inode_block(sbi, inode, &ibh); + err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { nilfs_warning(inode->i_sb, __func__, "failed to reget inode block.\n"); return err; } nilfs_update_inode(inode, ibh); - nilfs_mdt_mark_buffer_dirty(ibh); + mark_buffer_dirty(ibh); nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); brelse(ibh); return 0; @@ -901,7 +962,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) * construction. This function can be called both as a single operation * and as a part of indivisible file operations. */ -void nilfs_dirty_inode(struct inode *inode) +void nilfs_dirty_inode(struct inode *inode, int flags) { struct nilfs_transaction_info ti; struct nilfs_mdt_info *mdi = NILFS_MDT(inode); @@ -920,3 +981,134 @@ void nilfs_dirty_inode(struct inode *inode) nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ } + +int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + __u64 logical = 0, phys = 0, size = 0; + __u32 flags = 0; + loff_t isize; + sector_t blkoff, end_blkoff; + sector_t delalloc_blkoff; + unsigned long delalloc_blklen; + unsigned int blkbits = inode->i_blkbits; + int ret, n; + + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + + isize = i_size_read(inode); + + blkoff = start >> blkbits; + end_blkoff = (start + len - 1) >> blkbits; + + delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, + &delalloc_blkoff); + + do { + __u64 blkphy; + unsigned int maxblocks; + + if (delalloc_blklen && blkoff == delalloc_blkoff) { + if (size) { + /* End of the current extent */ + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, flags); + if (ret) + break; + } + if (blkoff > end_blkoff) + break; + + flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; + logical = blkoff << blkbits; + phys = 0; + size = delalloc_blklen << blkbits; + + blkoff = delalloc_blkoff + delalloc_blklen; + delalloc_blklen = nilfs_find_uncommitted_extent( + inode, blkoff, &delalloc_blkoff); + continue; + } + + /* + * Limit the number of blocks that we look up so as + * not to get into the next delayed allocation extent. + */ + maxblocks = INT_MAX; + if (delalloc_blklen) + maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, + maxblocks); + blkphy = 0; + + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + n = nilfs_bmap_lookup_contig( + NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + + if (n < 0) { + int past_eof; + + if (unlikely(n != -ENOENT)) + break; /* error */ + + /* HOLE */ + blkoff++; + past_eof = ((blkoff << blkbits) >= isize); + + if (size) { + /* End of the current extent */ + + if (past_eof) + flags |= FIEMAP_EXTENT_LAST; + + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, flags); + if (ret) + break; + size = 0; + } + if (blkoff > end_blkoff || past_eof) + break; + } else { + if (size) { + if (phys && blkphy << blkbits == phys + size) { + /* The current extent goes on */ + size += n << blkbits; + } else { + /* Terminate the current extent */ + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, + flags); + if (ret || blkoff > end_blkoff) + break; + + /* Start another extent */ + flags = FIEMAP_EXTENT_MERGED; + logical = blkoff << blkbits; + phys = blkphy << blkbits; + size = n << blkbits; + } + } else { + /* Start a new extent */ + flags = FIEMAP_EXTENT_MERGED; + logical = blkoff << blkbits; + phys = blkphy << blkbits; + size = n << blkbits; + } + blkoff += n; + } + cond_resched(); + } while (true); + + /* If ret is 1 then we just hit the end of the extent array */ + if (ret == 1) + ret = 0; + + mutex_unlock(&inode->i_mutex); + return ret; +} diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 3e90f86d5bf..422fb54b737 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -26,7 +26,9 @@ #include <linux/capability.h> /* capable() */ #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ #include <linux/vmalloc.h> -#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */ +#include <linux/compat.h> /* compat_ptr() */ +#include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */ +#include <linux/buffer_head.h> #include <linux/nilfs2_fs.h> #include "nilfs.h" #include "segment.h" @@ -35,7 +37,26 @@ #include "sufile.h" #include "dat.h" - +/** + * nilfs_ioctl_wrap_copy - wrapping function of get/set metadata info + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @dir: set of direction flags + * @dofunc: concrete function of get/set metadata info + * + * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of + * calling dofunc() function on the basis of @argv argument. + * + * Return Value: On success, 0 is returned and requested metadata info + * is copied into userspace. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, struct nilfs_argv *argv, int dir, ssize_t (*dofunc)(struct the_nilfs *, @@ -55,6 +76,14 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_size > PAGE_SIZE) return -EINVAL; + /* + * Reject pairs of a start item position (argv->v_index) and a + * total count (argv->v_nmembs) which leads position 'pos' to + * overflow by the increment at the end of the loop. + */ + if (argv->v_index > ~(__u64)0 - argv->v_nmembs) + return -EINVAL; + buf = (void *)__get_free_pages(GFP_NOFS, 0); if (unlikely(!buf)) return -ENOMEM; @@ -97,11 +126,102 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, return ret; } +/** + * nilfs_ioctl_getflags - ioctl to support lsattr + */ +static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp) +{ + unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE; + + return put_user(flags, (int __user *)argp); +} + +/** + * nilfs_ioctl_setflags - ioctl to support chattr + */ +static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp, + void __user *argp) +{ + struct nilfs_transaction_info ti; + unsigned int flags, oldflags; + int ret; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (get_user(flags, (int __user *)argp)) + return -EFAULT; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + flags = nilfs_mask_flags(inode->i_mode, flags); + + mutex_lock(&inode->i_mutex); + + oldflags = NILFS_I(inode)->i_flags; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by the + * relevant capability. + */ + ret = -EPERM; + if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && + !capable(CAP_LINUX_IMMUTABLE)) + goto out; + + ret = nilfs_transaction_begin(inode->i_sb, &ti, 0); + if (ret) + goto out; + + NILFS_I(inode)->i_flags = (oldflags & ~FS_FL_USER_MODIFIABLE) | + (flags & FS_FL_USER_MODIFIABLE); + + nilfs_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME; + if (IS_SYNC(inode)) + nilfs_set_transaction_flag(NILFS_TI_SYNC); + + nilfs_mark_inode_dirty(inode); + ret = nilfs_transaction_commit(inode->i_sb); +out: + mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(filp); + return ret; +} + +/** + * nilfs_ioctl_getversion - get info about a file's version (generation number) + */ +static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) +{ + return put_user(inode->i_generation, (int __user *)argp); +} + +/** + * nilfs_ioctl_change_cpmode - change checkpoint mode (checkpoint/snapshot) + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_change_cpmode() function changes mode of + * given checkpoint between checkpoint and snapshot state. This ioctl + * is used in chcp and mkcp utilities. + * + * Return Value: On success, 0 is returned and mode of a checkpoint is + * changed. On error, one of the following negative error codes + * is returned. + * + * %-EPERM - Operation not permitted. + * + * %-EFAULT - Failure during checkpoint mode changing. + */ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct inode *cpfile = nilfs->ns_cpfile; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_transaction_info ti; struct nilfs_cpmode cpmode; int ret; @@ -109,7 +229,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write(filp->f_path.mnt); + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -117,27 +237,46 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, if (copy_from_user(&cpmode, argp, sizeof(cpmode))) goto out; - down_read(&inode->i_sb->s_umount); + mutex_lock(&nilfs->ns_snapshot_mount_mutex); nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( - cpfile, cpmode.cm_cno, cpmode.cm_mode); + nilfs->ns_cpfile, cpmode.cm_cno, cpmode.cm_mode); if (unlikely(ret < 0)) nilfs_transaction_abort(inode->i_sb); else nilfs_transaction_commit(inode->i_sb); /* never fails */ - up_read(&inode->i_sb->s_umount); + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); out: - mnt_drop_write(filp->f_path.mnt); + mnt_drop_write_file(filp); return ret; } +/** + * nilfs_ioctl_delete_checkpoint - remove checkpoint + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_delete_checkpoint() function removes + * checkpoint from NILFS2 file system. This ioctl is used in rmcp + * utility. + * + * Return Value: On success, 0 is returned and a checkpoint is + * removed. On error, one of the following negative error codes + * is returned. + * + * %-EPERM - Operation not permitted. + * + * %-EFAULT - Failure during checkpoint removing. + */ static int nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_transaction_info ti; __u64 cno; int ret; @@ -145,7 +284,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write(filp->f_path.mnt); + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -154,16 +293,31 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, goto out; nilfs_transaction_begin(inode->i_sb, &ti, 0); - ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); + ret = nilfs_cpfile_delete_checkpoint(nilfs->ns_cpfile, cno); if (unlikely(ret < 0)) nilfs_transaction_abort(inode->i_sb); else nilfs_transaction_commit(inode->i_sb); /* never fails */ out: - mnt_drop_write(filp->f_path.mnt); + mnt_drop_write_file(filp); return ret; } +/** + * nilfs_ioctl_do_get_cpinfo - callback method getting info about checkpoints + * @nilfs: nilfs object + * @posp: pointer on array of checkpoint's numbers + * @flags: checkpoint mode (checkpoint or snapshot) + * @buf: buffer for storing checkponts' info + * @size: size in bytes of one checkpoint info item in array + * @nmembs: number of checkpoints in array (numbers and infos) + * + * Description: nilfs_ioctl_do_get_cpinfo() function returns info about + * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in + * lscp utility and by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_cpinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -177,10 +331,31 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } +/** + * nilfs_ioctl_get_cpstat - get checkpoints statistics + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints. + * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities + * and by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and checkpoints information is + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting checkpoints statistics. + */ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_cpstat cpstat; int ret; @@ -195,6 +370,21 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_do_get_suinfo - callback method getting segment usage info + * @nilfs: nilfs object + * @posp: pointer on array of segment numbers + * @flags: *not used* + * @buf: buffer for storing suinfo array + * @size: size in bytes of one suinfo item in array + * @nmembs: count of segment numbers and suinfos in array + * + * Description: nilfs_ioctl_do_get_suinfo() function returns segment usage + * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used + * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_suinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -208,10 +398,31 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } +/** + * nilfs_ioctl_get_sustat - get segment usage statistics + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_get_sustat() returns segment usage statistics. + * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities + * and by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and segment usage information is + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting segment usage statistics. + */ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_sustat sustat; int ret; @@ -226,6 +437,21 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_do_get_vinfo - callback method getting virtual blocks info + * @nilfs: nilfs object + * @posp: *not used* + * @flags: *not used* + * @buf: buffer for storing array of nilfs_vinfo structures + * @size: size in bytes of one vinfo item in array + * @nmembs: count of vinfos in array + * + * Description: nilfs_ioctl_do_get_vinfo() function returns information + * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used + * by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_vinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -233,17 +459,31 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, int ret; down_read(&nilfs->ns_segctor_sem); - ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs); + ret = nilfs_dat_get_vinfo(nilfs->ns_dat, buf, size, nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } +/** + * nilfs_ioctl_do_get_bdescs - callback method getting disk block descriptors + * @nilfs: nilfs object + * @posp: *not used* + * @flags: *not used* + * @buf: buffer for storing array of nilfs_bdesc structures + * @size: size in bytes of one bdesc item in array + * @nmembs: count of bdescs in array + * + * Description: nilfs_ioctl_do_get_bdescs() function returns information + * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl + * is used by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_bdescs structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - struct inode *dat = nilfs_dat_inode(nilfs); - struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; + struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; int ret, i; @@ -265,10 +505,33 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, return nmembs; } +/** + * nilfs_ioctl_get_bdescs - get disk block descriptors + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_do_get_bdescs() function returns information + * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl + * is used by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and disk block descriptors are + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting disk block descriptors. + */ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_argv argv; int ret; @@ -288,6 +551,26 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_move_inode_block - prepare data/node block for moving by GC + * @inode: inode object + * @vdesc: descriptor of virtual block number + * @buffers: list of moving buffers + * + * Description: nilfs_ioctl_move_inode_block() function registers data/node + * buffer in the GC pagecache and submit read request. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - Requested block doesn't exist. + * + * %-EEXIST - Blocks conflict is detected. + */ static int nilfs_ioctl_move_inode_block(struct inode *inode, struct nilfs_vdesc *vdesc, struct list_head *buffers) @@ -333,10 +616,24 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } +/** + * nilfs_ioctl_move_blocks - move valid inode's blocks during garbage collection + * @sb: superblock object + * @argv: vector of arguments from userspace + * @buf: array of nilfs_vdesc structures + * + * Description: nilfs_ioctl_move_blocks() function reads valid data/node + * blocks that garbage collector specified with the array of nilfs_vdesc + * structures and stores them into page caches of GC inodes. + * + * Return Value: Number of processed nilfs_vdesc structures or + * error code, otherwise. + */ static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; + struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_vdesc *vdesc; struct buffer_head *bh, *n; @@ -349,10 +646,21 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, ino = vdesc->vd_ino; cno = vdesc->vd_cno; inode = nilfs_iget_for_gc(sb, ino, cno); - if (unlikely(inode == NULL)) { - ret = -ENOMEM; + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); goto failed; } + if (list_empty(&NILFS_I(inode)->i_dirty)) { + /* + * Add the inode to GC inode list. Garbage Collection + * is serialized and no two processes manipulate the + * list simultaneously. + */ + igrab(inode); + list_add(&NILFS_I(inode)->i_dirty, + &nilfs->ns_gc_inodes); + } + do { ret = nilfs_ioctl_move_inode_block(inode, vdesc, &buffers); @@ -386,6 +694,25 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, return ret; } +/** + * nilfs_ioctl_delete_checkpoints - delete checkpoints + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of periods of checkpoints numbers + * + * Description: nilfs_ioctl_delete_checkpoints() function deletes checkpoints + * in the period from p_start to p_end, excluding p_end itself. The checkpoints + * which have been already deleted are ignored. + * + * Return Value: Number of processed nilfs_period structures or + * error code, otherwise. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - invalid checkpoints. + */ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { @@ -403,23 +730,58 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, return nmembs; } +/** + * nilfs_ioctl_free_vblocknrs - free virtual block numbers + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of virtual block numbers + * + * Description: nilfs_ioctl_free_vblocknrs() function frees + * the virtual block numbers specified by @buf and @argv->v_nmembs. + * + * Return Value: Number of processed virtual block numbers or + * error code, otherwise. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - The virtual block number have not been allocated. + */ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; int ret; - ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_freev(nilfs->ns_dat, buf, nmembs); return (ret < 0) ? ret : nmembs; } +/** + * nilfs_ioctl_mark_blocks_dirty - mark blocks dirty + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of block descriptors + * + * Description: nilfs_ioctl_mark_blocks_dirty() function marks + * metadata file or data blocks as dirty. + * + * Return Value: Number of processed block descriptors or + * error code, otherwise. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EIO - I/O error + * + * %-ENOENT - the specified block does not exist (hole block) + */ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; - struct inode *dat = nilfs_dat_inode(nilfs); - struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; + struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; int ret, i; @@ -438,7 +800,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, /* skip dead block */ continue; if (bdescs[i].bd_level == 0) { - ret = nilfs_mdt_mark_block_dirty(dat, + ret = nilfs_mdt_mark_block_dirty(nilfs->ns_dat, bdescs[i].bd_offset); if (ret < 0) { WARN_ON(ret == -ENOENT); @@ -496,6 +858,20 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, return ret; } +/** + * nilfs_ioctl_clean_segments - clean segments + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_clean_segments() function makes garbage + * collection operation in the environment of requested parameters + * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by + * nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -516,7 +892,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write(filp->f_path.mnt); + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -528,6 +904,8 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; + if (nsegs > UINT_MAX / sizeof(__u64)) + goto out; /* * argv[4] points to segment numbers this ioctl cleans. We @@ -540,7 +918,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, ret = PTR_ERR(kbufs[4]); goto out; } - nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + nilfs = inode->i_sb->s_fs_info; for (n = 0; n < 4; n++) { ret = -EINVAL; @@ -550,6 +928,9 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) goto out_free; + if (argv[n].v_nmembs >= UINT_MAX / argv[n].v_size) + goto out_free; + len = argv[n].v_size * argv[n].v_nmembs; base = (void __user *)(unsigned long)argv[n].v_base; if (len == 0) { @@ -580,14 +961,15 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, goto out_free; } - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); if (ret < 0) printk(KERN_ERR "NILFS: GC failed during preparation: " "cannot read source blocks: err=%d\n", ret); - else + else { + if (nilfs_sb_need_update(nilfs)) + set_nilfs_discontinued(nilfs); ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + } nilfs_remove_all_gcinodes(nilfs); clear_nilfs_gc_running(nilfs); @@ -597,10 +979,37 @@ out_free: vfree(kbufs[n]); kfree(kbufs[4]); out: - mnt_drop_write(filp->f_path.mnt); + mnt_drop_write_file(filp); return ret; } +/** + * nilfs_ioctl_sync - make a checkpoint + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_sync() function constructs a logical segment + * for checkpointing. This function guarantees that all modified data + * and metadata are written out to the device when it successfully + * returned. + * + * Return Value: On success, 0 is retured. On errors, one of the following + * negative error code is returned. + * + * %-EROFS - Read only filesystem. + * + * %-EIO - I/O error + * + * %-ENOSPC - No space left on device (only in a panic state). + * + * %-ERESTARTSYS - Interrupted. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -612,8 +1021,14 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, if (ret < 0) return ret; + nilfs = inode->i_sb->s_fs_info; + if (nilfs_test_opt(nilfs, BARRIER)) { + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (ret == -EIO) + return ret; + } + if (argp != NULL) { - nilfs = NILFS_SB(inode->i_sb)->s_nilfs; down_read(&nilfs->ns_segctor_sem); cno = nilfs->ns_cno - 1; up_read(&nilfs->ns_segctor_sem); @@ -623,6 +1038,146 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return 0; } +/** + * nilfs_ioctl_resize - resize NILFS2 volume + * @inode: inode object + * @filp: file object + * @argp: pointer on argument from userspace + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ +static int nilfs_ioctl_resize(struct inode *inode, struct file *filp, + void __user *argp) +{ + __u64 newsize; + int ret = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = mnt_want_write_file(filp); + if (ret) + goto out; + + ret = -EFAULT; + if (copy_from_user(&newsize, argp, sizeof(newsize))) + goto out_drop_write; + + ret = nilfs_resize_fs(inode->i_sb, newsize); + +out_drop_write: + mnt_drop_write_file(filp); +out: + return ret; +} + +/** + * nilfs_ioctl_trim_fs() - trim ioctl handle function + * @inode: inode object + * @argp: pointer on argument from userspace + * + * Decription: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It + * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which + * performs the actual trim operation. + * + * Return Value: On success, 0 is returned or negative error code, otherwise. + */ +static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + struct request_queue *q = bdev_get_queue(nilfs->ns_bdev); + struct fstrim_range range; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + + if (copy_from_user(&range, argp, sizeof(range))) + return -EFAULT; + + range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity); + + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range); + up_read(&nilfs->ns_segctor_sem); + + if (ret < 0) + return ret; + + if (copy_to_user(argp, &range, sizeof(range))) + return -EFAULT; + + return 0; +} + +/** + * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated + * @inode: inode object + * @argp: pointer on argument from userspace + * + * Decription: nilfs_ioctl_set_alloc_range() function defines lower limit + * of segments in bytes and upper limit of segments in bytes. + * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility. + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ +static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + __u64 range[2]; + __u64 minseg, maxseg; + unsigned long segbytes; + int ret = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = -EFAULT; + if (copy_from_user(range, argp, sizeof(__u64[2]))) + goto out; + + ret = -ERANGE; + if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode)) + goto out; + + segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize; + + minseg = range[0] + segbytes - 1; + do_div(minseg, segbytes); + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); + do_div(maxseg, segbytes); + maxseg--; + + ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg); +out: + return ret; +} + +/** + * nilfs_ioctl_get_info - wrapping function of get metadata info + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * @membsz: size of an item in bytes + * @dofunc: concrete function of getting metadata info + * + * Description: nilfs_ioctl_get_info() gets metadata info by means of + * calling dofunc() function. + * + * Return Value: On success, 0 is returned and requested metadata info + * is copied into userspace. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, size_t membsz, @@ -631,7 +1186,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, void *, size_t, size_t)) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_argv argv; int ret; @@ -650,12 +1205,107 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_set_suinfo - set segment usage info + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: Expects an array of nilfs_suinfo_update structures + * encapsulated in nilfs_argv and updates the segment usage info + * according to the flags in nilfs_suinfo_update. + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EPERM - Not enough permissions + * + * %-EFAULT - Error copying input data + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid values in input (segment number, flags or nblocks) + */ +static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + struct nilfs_transaction_info ti; + struct nilfs_argv argv; + size_t len; + void __user *base; + void *kbuf; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = -EFAULT; + if (copy_from_user(&argv, argp, sizeof(argv))) + goto out; + + ret = -EINVAL; + if (argv.v_size < sizeof(struct nilfs_suinfo_update)) + goto out; + + if (argv.v_nmembs > nilfs->ns_nsegments) + goto out; + + if (argv.v_nmembs >= UINT_MAX / argv.v_size) + goto out; + + len = argv.v_size * argv.v_nmembs; + if (!len) { + ret = 0; + goto out; + } + + base = (void __user *)(unsigned long)argv.v_base; + kbuf = vmalloc(len); + if (!kbuf) { + ret = -ENOMEM; + goto out; + } + + if (copy_from_user(kbuf, base, len)) { + ret = -EFAULT; + goto out_free; + } + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size, + argv.v_nmembs); + if (unlikely(ret < 0)) + nilfs_transaction_abort(inode->i_sb); + else + nilfs_transaction_commit(inode->i_sb); /* never fails */ + +out_free: + vfree(kbuf); +out: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); void __user *argp = (void __user *)arg; switch (cmd) { + case FS_IOC_GETFLAGS: + return nilfs_ioctl_getflags(inode, argp); + case FS_IOC_SETFLAGS: + return nilfs_ioctl_setflags(inode, filp, argp); + case FS_IOC_GETVERSION: + return nilfs_ioctl_getversion(inode, argp); case NILFS_IOCTL_CHANGE_CPMODE: return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp); case NILFS_IOCTL_DELETE_CHECKPOINT: @@ -670,6 +1320,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_get_info(inode, filp, cmd, argp, sizeof(struct nilfs_suinfo), nilfs_ioctl_do_get_suinfo); + case NILFS_IOCTL_SET_SUINFO: + return nilfs_ioctl_set_suinfo(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: @@ -682,7 +1334,48 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); case NILFS_IOCTL_SYNC: return nilfs_ioctl_sync(inode, filp, cmd, argp); + case NILFS_IOCTL_RESIZE: + return nilfs_ioctl_resize(inode, filp, argp); + case NILFS_IOCTL_SET_ALLOC_RANGE: + return nilfs_ioctl_set_alloc_range(inode, argp); + case FITRIM: + return nilfs_ioctl_trim_fs(inode, argp); default: return -ENOTTY; } } + +#ifdef CONFIG_COMPAT +long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + case FS_IOC32_GETVERSION: + cmd = FS_IOC_GETVERSION; + break; + case NILFS_IOCTL_CHANGE_CPMODE: + case NILFS_IOCTL_DELETE_CHECKPOINT: + case NILFS_IOCTL_GET_CPINFO: + case NILFS_IOCTL_GET_CPSTAT: + case NILFS_IOCTL_GET_SUINFO: + case NILFS_IOCTL_SET_SUINFO: + case NILFS_IOCTL_GET_SUSTAT: + case NILFS_IOCTL_GET_VINFO: + case NILFS_IOCTL_GET_BDESCS: + case NILFS_IOCTL_CLEAN_SEGMENTS: + case NILFS_IOCTL_SYNC: + case NILFS_IOCTL_RESIZE: + case NILFS_IOCTL_SET_ALLOC_RANGE: + case FITRIM: + break; + default: + return -ENOIOCTLCMD; + } + return nilfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#endif diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 39a5b84e2c9..c4dcd1db57e 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -58,15 +58,15 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_buffer_uptodate(bh); - nilfs_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); return 0; } @@ -237,8 +237,6 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, * * %-ENOENT - the specified block does not exist (hole block) * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) - * * %-EROFS - Read only filesystem (for create mode) */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, @@ -273,8 +271,6 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error - * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { @@ -350,8 +346,6 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) * %-EIO - I/O error * * %-ENOENT - the specified block does not exist (hole block) - * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) */ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) { @@ -361,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) err = nilfs_mdt_read_block(inode, block, 0, &bh); if (unlikely(err)) return err; - nilfs_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); brelse(bh); return 0; @@ -381,14 +375,25 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode; + struct inode *inode = page->mapping->host; struct super_block *sb; int err = 0; + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); - inode = page->mapping->host; if (!inode) return 0; @@ -405,7 +410,6 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) static const struct address_space_operations def_mdt_aops = { .writepage = nilfs_mdt_write_page, - .sync_page = block_sync_page, }; static const struct inode_operations def_mdt_iops; @@ -444,10 +448,6 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } -static const struct address_space_operations shadow_map_aops = { - .sync_page = block_sync_page, -}; - /** * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file * @inode: inode of the metadata file @@ -460,10 +460,10 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct backing_dev_info *bdi = inode->i_sb->s_bdi; INIT_LIST_HEAD(&shadow->frozen_buffers); - nilfs_mapping_init_once(&shadow->frozen_data); - nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); - nilfs_mapping_init_once(&shadow->frozen_btnodes); - nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); + address_space_init_once(&shadow->frozen_data); + nilfs_mapping_init(&shadow->frozen_data, inode, bdi); + address_space_init_once(&shadow->frozen_btnodes); + nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi); mi->mi_shadow = shadow; return 0; } @@ -499,31 +499,29 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) struct buffer_head *bh_frozen; struct page *page; int blkbits = inode->i_blkbits; - int ret = -ENOMEM; page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); if (!page) - return ret; + return -ENOMEM; if (!page_has_buffers(page)) create_empty_buffers(page, 1 << blkbits, 0); bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); - if (bh_frozen) { - if (!buffer_uptodate(bh_frozen)) - nilfs_copy_buffer(bh_frozen, bh); - if (list_empty(&bh_frozen->b_assoc_buffers)) { - list_add_tail(&bh_frozen->b_assoc_buffers, - &shadow->frozen_buffers); - set_buffer_nilfs_redirected(bh); - } else { - brelse(bh_frozen); /* already frozen */ - } - ret = 0; + + if (!buffer_uptodate(bh_frozen)) + nilfs_copy_buffer(bh_frozen, bh); + if (list_empty(&bh_frozen->b_assoc_buffers)) { + list_add_tail(&bh_frozen->b_assoc_buffers, + &shadow->frozen_buffers); + set_buffer_nilfs_redirected(bh); + } else { + brelse(bh_frozen); /* already frozen */ } + unlock_page(page); page_cache_release(page); - return ret; + return 0; } struct buffer_head * @@ -574,10 +572,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping); + nilfs_clear_dirty_pages(inode->i_mapping, true); nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); - nilfs_clear_dirty_pages(&ii->i_btnode_cache); + nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index b13734bf352..ab172e8549c 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -28,6 +28,13 @@ #include "nilfs.h" #include "page.h" +/** + * struct nilfs_shadow_map - shadow mapping of meta data file + * @bmap_store: shadow copy of bmap state + * @frozen_data: shadowed dirty data pages + * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @frozen_buffers: list of frozen buffers + */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; struct address_space frozen_data; @@ -64,11 +71,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) return inode->i_private; } -static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) -{ - return NILFS_SB(inode->i_sb)->s_nilfs; -} - /* Default GFP flags using highmem */ #define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) @@ -93,8 +95,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh); struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh); -#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) - static inline void nilfs_mdt_mark_dirty(struct inode *inode) { if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) @@ -108,7 +108,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode) static inline __u64 nilfs_mdt_cno(struct inode *inode) { - return NILFS_I_NILFS(inode)->ns_cno; + return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno; } #define nilfs_mdt_bgl_lock(inode, bg) \ diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 6e9557ecf16..9de78f08989 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -63,7 +63,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) */ static struct dentry * -nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; @@ -72,12 +72,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) return ERR_PTR(-ENAMETOOLONG); ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = NULL; - if (ino) { - inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - } + inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; return d_splice_alias(inode, dentry); } @@ -89,8 +84,8 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) +static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) { struct inode *inode; struct nilfs_transaction_info ti; @@ -117,7 +112,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, } static int -nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) +nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; struct nilfs_transaction_info ti; @@ -198,9 +193,6 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, struct nilfs_transaction_info ti; int err; - if (inode->i_nlink >= NILFS_LINK_MAX) - return -EMLINK; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; @@ -218,15 +210,12 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, return err; } -static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; struct nilfs_transaction_info ti; int err; - if (dir->i_nlink >= NILFS_LINK_MAX) - return -EMLINK; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; @@ -294,7 +283,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) nilfs_warning(inode->i_sb, __func__, "deleting nonexistent file (%lu), %d\n", inode->i_ino, inode->i_nlink); - inode->i_nlink = 1; + set_nlink(inode, 1); } err = nilfs_delete_entry(de, page); if (err) @@ -397,7 +386,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_de) goto out_dir; - inc_nlink(old_inode); nilfs_set_link(new_dir, new_de, new_page, old_inode); nilfs_mark_inode_dirty(new_dir); new_inode->i_ctime = CURRENT_TIME; @@ -406,18 +394,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, drop_nlink(new_inode); nilfs_mark_inode_dirty(new_inode); } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= NILFS_LINK_MAX) - goto out_dir; - } - inc_nlink(old_inode); err = nilfs_add_link(new_dentry, old_inode); - if (err) { - drop_nlink(old_inode); - nilfs_mark_inode_dirty(old_inode); + if (err) goto out_dir; - } if (dir_de) { inc_nlink(new_dir); nilfs_mark_inode_dirty(new_dir); @@ -431,7 +410,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = CURRENT_TIME; nilfs_delete_entry(old_de, old_page); - drop_nlink(old_inode); if (dir_de) { nilfs_set_link(old_inode, dir_de, dir_page, new_dir); @@ -463,7 +441,7 @@ static struct dentry *nilfs_get_parent(struct dentry *child) { unsigned long ino; struct inode *inode; - struct qstr dotdot = {.name = "..", .len = 2}; + struct qstr dotdot = QSTR_INIT("..", 2); struct nilfs_root *root; ino = nilfs_inode_by_name(child->d_inode, &dotdot); @@ -488,7 +466,7 @@ static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno, if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO) return ERR_PTR(-ESTALE); - root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); + root = nilfs_lookup_root(sb->s_fs_info, cno); if (!root) return ERR_PTR(-ESTALE); @@ -530,31 +508,29 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); } -static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, - int connectable) +static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp, + struct inode *parent) { struct nilfs_fid *fid = (struct nilfs_fid *)fh; - struct inode *inode = dentry->d_inode; struct nilfs_root *root = NILFS_I(inode)->i_root; int type; - if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE || - (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE)) - return 255; + if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) { + *lenp = NILFS_FID_SIZE_CONNECTABLE; + return FILEID_INVALID; + } + if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) { + *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; + return FILEID_INVALID; + } fid->cno = root->cno; fid->ino = inode->i_ino; fid->gen = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; + if (parent) { fid->parent_ino = parent->i_ino; fid->parent_gen = parent->i_generation; - spin_unlock(&dentry->d_lock); - type = FILEID_NILFS_WITH_PARENT; *lenp = NILFS_FID_SIZE_CONNECTABLE; } else { @@ -577,6 +553,7 @@ const struct inode_operations nilfs_dir_inode_operations = { .rename = nilfs_rename, .setattr = nilfs_setattr, .permission = nilfs_permission, + .fiemap = nilfs_fiemap, }; const struct inode_operations nilfs_special_inode_operations = { diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index f7560da5a56..9bc72dec3fa 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -30,11 +30,23 @@ #include <linux/blkdev.h> #include <linux/nilfs2_fs.h> #include "the_nilfs.h" -#include "sb.h" #include "bmap.h" -/* - * nilfs inode data in memory +/** + * struct nilfs_inode_info - nilfs inode data in memory + * @i_flags: inode flags + * @i_state: dynamic state flags + * @i_bmap: pointer on i_bmap_data + * @i_bmap_data: raw block mapping + * @i_xattr: <TODO> + * @i_dir_start_lookup: page index of last successful search + * @i_cno: checkpoint number for GC inode + * @i_btnode_cache: cached pages of b-tree nodes + * @i_dirty: list for connecting dirty files + * @xattr_sem: semaphore for extended attributes processing + * @i_bh: buffer contains disk inode + * @i_root: root object of the current filesystem tree + * @vfs_inode: VFS inode object */ struct nilfs_inode_info { __u32 i_flags; @@ -81,12 +93,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) return &ii->vfs_inode; } -static inline struct inode *NILFS_AS_I(struct address_space *mapping) -{ - return (mapping->host) ? : - container_of(mapping, struct inode, i_data); -} - /* * Dynamic state flags of NILFS on-memory inode (i_state) */ @@ -115,19 +121,19 @@ enum { * Macros to check inode numbers */ #define NILFS_MDT_INO_BITS \ - ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ - 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ - 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) + ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ + 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ + 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) #define NILFS_SYS_INO_BITS \ - ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) + ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) -#define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino) +#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) #define NILFS_MDT_INODE(sb, ino) \ - ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) + ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) #define NILFS_VALID_INODE(sb, ino) \ - ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) + ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) /** * struct nilfs_transaction_info: context information for synchronization @@ -190,11 +196,6 @@ static inline int nilfs_doing_construction(void) return nilfs_test_transaction_flag(NILFS_TI_WRITER); } -static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) -{ - return nilfs->ns_dat; -} - /* * function prototype */ @@ -217,6 +218,23 @@ static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) #define NILFS_ATIME_DISABLE +/* Flags that should be inherited by new inodes from their parent. */ +#define NILFS_FL_INHERITED \ + (FS_SECRM_FL | FS_UNRM_FL | FS_COMPR_FL | FS_SYNC_FL | \ + FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL |\ + FS_COMPRBLK_FL | FS_NOCOMP_FL | FS_NOTAIL_FL | FS_DIRSYNC_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & ~(FS_DIRSYNC_FL | FS_TOPDIR_FL); + else + return flags & (FS_NODUMP_FL | FS_NOATIME_FL); +} + /* dir.c */ extern int nilfs_add_link(struct dentry *, struct inode *); extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); @@ -230,15 +248,18 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, struct page *, struct inode *); /* file.c */ -extern int nilfs_sync_file(struct file *, int); +extern int nilfs_sync_file(struct file *, loff_t, loff_t, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); +long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, void **); /* inode.c */ -extern struct inode *nilfs_new_inode(struct inode *, int); +void nilfs_inode_add_blocks(struct inode *inode, int n); +void nilfs_inode_sub_blocks(struct inode *inode, int n); +extern struct inode *nilfs_new_inode(struct inode *, umode_t); extern void nilfs_free_inode(struct inode *); extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern void nilfs_set_inode_flags(struct inode *); @@ -256,22 +277,23 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); +extern void nilfs_write_failed(struct address_space *mapping, loff_t to); int nilfs_permission(struct inode *inode, int mask); -extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, - struct buffer_head **); +int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); -extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, - unsigned); +int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); extern int nilfs_mark_inode_dirty(struct inode *); -extern void nilfs_dirty_inode(struct inode *); +extern void nilfs_dirty_inode(struct inode *, int flags); +int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len); /* super.c */ extern struct inode *nilfs_alloc_inode(struct super_block *); extern void nilfs_destroy_inode(struct inode *); -extern void nilfs_error(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void nilfs_warning(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void nilfs_error(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void nilfs_warning(struct super_block *, const char *, const char *, ...); extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, @@ -280,11 +302,12 @@ extern int nilfs_check_feature_compatibility(struct super_block *, struct nilfs_super_block *); extern void nilfs_set_log_cursor(struct nilfs_super_block *, struct the_nilfs *); -extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *, - int flip); -extern int nilfs_commit_super(struct nilfs_sb_info *, int); -extern int nilfs_cleanup_super(struct nilfs_sb_info *); -int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, +struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, + int flip); +int nilfs_commit_super(struct super_block *sb, int flag); +int nilfs_cleanup_super(struct super_block *sb); +int nilfs_resize_fs(struct super_block *sb, __u64 newsize); +int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, struct nilfs_root **root); int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index a6c3c2e817f..da276640f77 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -37,8 +37,7 @@ #define NILFS_BUFFER_INHERENT_BITS \ ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ - (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ - (1UL << BH_NILFS_Checked)) + (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked)) static struct buffer_head * __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, @@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, return bh; } -/* - * Since the page cache of B-tree node pages or data page cache of pseudo - * inodes does not have a valid mapping->host pointer, calling - * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; - * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). - * To avoid this problem, the old style mark_buffer_dirty() is used instead. - */ -void nilfs_mark_buffer_dirty(struct buffer_head *bh) -{ - if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) - __set_page_dirty_nobuffers(bh->b_page); -} - struct buffer_head *nilfs_grab_buffer(struct inode *inode, struct address_space *mapping, unsigned long blkoff, @@ -108,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh) clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); clear_buffer_nilfs_redirected(bh); + clear_buffer_async_write(bh); clear_buffer_dirty(bh); if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); @@ -133,11 +120,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) struct page *spage = sbh->b_page, *dpage = dbh->b_page; struct buffer_head *bh; - kaddr0 = kmap_atomic(spage, KM_USER0); - kaddr1 = kmap_atomic(dpage, KM_USER1); + kaddr0 = kmap_atomic(spage); + kaddr1 = kmap_atomic(dpage); memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); - kunmap_atomic(kaddr1, KM_USER1); - kunmap_atomic(kaddr0, KM_USER0); + kunmap_atomic(kaddr1); + kunmap_atomic(kaddr0); dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; dbh->b_blocknr = sbh->b_blocknr; @@ -183,7 +170,7 @@ int nilfs_page_buffers_clean(struct page *page) void nilfs_page_bug(struct page *page) { struct address_space *m; - unsigned long ino = 0; + unsigned long ino; if (unlikely(!page)) { printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); @@ -191,11 +178,8 @@ void nilfs_page_bug(struct page *page) } m = page->mapping; - if (m) { - struct inode *inode = NILFS_AS_I(m); - if (inode != NULL) - ino = inode->i_ino; - } + ino = m ? m->host->i_ino : 0; + printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " "mapping=%p ino=%lu\n", page, atomic_read(&page->_count), @@ -217,56 +201,6 @@ void nilfs_page_bug(struct page *page) } /** - * nilfs_alloc_private_page - allocate a private page with buffer heads - * - * Return Value: On success, a pointer to the allocated page is returned. - * On error, NULL is returned. - */ -struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, - unsigned long state) -{ - struct buffer_head *bh, *head, *tail; - struct page *page; - - page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ - if (unlikely(!page)) - return NULL; - - lock_page(page); - head = alloc_page_buffers(page, size, 0); - if (unlikely(!head)) { - unlock_page(page); - __free_page(page); - return NULL; - } - - bh = head; - do { - bh->b_state = (1UL << BH_NILFS_Allocated) | state; - tail = bh; - bh->b_bdev = bdev; - bh = bh->b_this_page; - } while (bh); - - tail->b_this_page = head; - attach_page_buffers(page, head); - - return page; -} - -void nilfs_free_private_page(struct page *page) -{ - BUG_ON(!PageLocked(page)); - BUG_ON(page->mapping); - - if (page_has_buffers(page) && !try_to_free_buffers(page)) - NILFS_PAGE_BUG(page, "failed to free page"); - - unlock_page(page); - __free_page(page); -} - -/** * nilfs_copy_page -- copy the page with buffers * @dst: destination page * @src: source page @@ -437,7 +371,12 @@ repeat: goto repeat; } -void nilfs_clear_dirty_pages(struct address_space *mapping) +/** + * nilfs_clear_dirty_pages - discard dirty pages in address space + * @mapping: address space with dirty pages for discarding + * @silent: suppress [true] or print [false] warning messages + */ +void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) { struct pagevec pvec; unsigned int i; @@ -449,25 +388,9 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) PAGEVEC_SIZE)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - struct buffer_head *bh, *head; lock_page(page); - ClearPageUptodate(page); - ClearPageMappedToDisk(page); - bh = head = page_buffers(page); - do { - lock_buffer(bh); - clear_buffer_dirty(bh); - clear_buffer_nilfs_volatile(bh); - clear_buffer_nilfs_checked(bh); - clear_buffer_nilfs_redirected(bh); - clear_buffer_uptodate(bh); - clear_buffer_mapped(bh); - unlock_buffer(bh); - bh = bh->b_this_page; - } while (bh != head); - - __nilfs_clear_page_dirty(page); + nilfs_clear_dirty_page(page, silent); unlock_page(page); } pagevec_release(&pvec); @@ -475,6 +398,52 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) } } +/** + * nilfs_clear_dirty_page - discard dirty page + * @page: dirty page that will be discarded + * @silent: suppress [true] or print [false] warning messages + */ +void nilfs_clear_dirty_page(struct page *page, bool silent) +{ + struct inode *inode = page->mapping->host; + struct super_block *sb = inode->i_sb; + + BUG_ON(!PageLocked(page)); + + if (!silent) { + nilfs_warning(sb, __func__, + "discard page: offset %lld, ino %lu", + page_offset(page), inode->i_ino); + } + + ClearPageUptodate(page); + ClearPageMappedToDisk(page); + + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + lock_buffer(bh); + if (!silent) { + nilfs_warning(sb, __func__, + "discard block %llu, size %zu", + (u64)bh->b_blocknr, bh->b_size); + } + clear_buffer_async_write(bh); + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); + clear_buffer_uptodate(bh); + clear_buffer_mapped(bh); + unlock_buffer(bh); + } while (bh = bh->b_this_page, bh != head); + } + + __nilfs_clear_page_dirty(page); +} + unsigned nilfs_page_count_clean_buffers(struct page *page, unsigned from, unsigned to) { @@ -491,30 +460,16 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, } return nc; } - -void nilfs_mapping_init_once(struct address_space *mapping) -{ - memset(mapping, 0, sizeof(*mapping)); - INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); - spin_lock_init(&mapping->tree_lock); - INIT_LIST_HEAD(&mapping->private_list); - spin_lock_init(&mapping->private_lock); - - spin_lock_init(&mapping->i_mmap_lock); - INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); - INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); -} -void nilfs_mapping_init(struct address_space *mapping, - struct backing_dev_info *bdi, - const struct address_space_operations *aops) +void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, + struct backing_dev_info *bdi) { - mapping->host = NULL; + mapping->host = inode; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->assoc_mapping = NULL; + mapping->private_data = NULL; mapping->backing_dev_info = bdi; - mapping->a_ops = aops; + mapping->a_ops = &empty_aops; } /* @@ -546,3 +501,87 @@ int __nilfs_clear_page_dirty(struct page *page) } return TestClearPageDirty(page); } + +/** + * nilfs_find_uncommitted_extent - find extent of uncommitted data + * @inode: inode + * @start_blk: start block offset (in) + * @blkoff: start offset of the found extent (out) + * + * This function searches an extent of buffers marked "delayed" which + * starts from a block offset equal to or larger than @start_blk. If + * such an extent was found, this will store the start offset in + * @blkoff and return its length in blocks. Otherwise, zero is + * returned. + */ +unsigned long nilfs_find_uncommitted_extent(struct inode *inode, + sector_t start_blk, + sector_t *blkoff) +{ + unsigned int i; + pgoff_t index; + unsigned int nblocks_in_page; + unsigned long length = 0; + sector_t b; + struct pagevec pvec; + struct page *page; + + if (inode->i_mapping->nrpages == 0) + return 0; + + index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + pagevec_init(&pvec, 0); + +repeat: + pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, + pvec.pages); + if (pvec.nr == 0) + return length; + + if (length > 0 && pvec.pages[0]->index > index) + goto out; + + b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + i = 0; + do { + page = pvec.pages[i]; + + lock_page(page); + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + if (b < start_blk) + continue; + if (buffer_delay(bh)) { + if (length == 0) + *blkoff = b; + length++; + } else if (length > 0) { + goto out_locked; + } + } while (++b, bh = bh->b_this_page, bh != head); + } else { + if (length > 0) + goto out_locked; + + b += nblocks_in_page; + } + unlock_page(page); + + } while (++i < pagevec_count(&pvec)); + + index = page->index + 1; + pagevec_release(&pvec); + cond_resched(); + goto repeat; + +out_locked: + unlock_page(page); +out: + pagevec_release(&pvec); + return length; +} diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index fb9e8a8a203..ef30c5c2426 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -38,14 +38,12 @@ enum { BH_NILFS_Redirected, }; -BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ BUFFER_FNS(NILFS_Volatile, nilfs_volatile) BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ -void nilfs_mark_buffer_dirty(struct buffer_head *bh); int __nilfs_clear_page_dirty(struct page *); struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, @@ -54,18 +52,17 @@ void nilfs_forget_buffer(struct buffer_head *); void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); int nilfs_page_buffers_clean(struct page *); void nilfs_page_bug(struct page *); -struct page *nilfs_alloc_private_page(struct block_device *, int, - unsigned long); -void nilfs_free_private_page(struct page *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_dirty_pages(struct address_space *); -void nilfs_mapping_init_once(struct address_space *mapping); -void nilfs_mapping_init(struct address_space *mapping, - struct backing_dev_info *bdi, - const struct address_space_operations *aops); +void nilfs_clear_dirty_page(struct page *, bool); +void nilfs_clear_dirty_pages(struct address_space *, bool); +void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, + struct backing_dev_info *bdi); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); +unsigned long nilfs_find_uncommitted_extent(struct inode *inode, + sector_t start_blk, + sector_t *blkoff); #define NILFS_PAGE_BUG(page, m, a...) \ do { nilfs_page_bug(page); BUG(); } while (0) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 5d2711c28da..ff00a0b7acb 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr, static void dispose_recovery_list(struct list_head *head) { while (!list_empty(head)) { - struct nilfs_recovery_block *rb - = list_entry(head->next, - struct nilfs_recovery_block, list); + struct nilfs_recovery_block *rb; + + rb = list_first_entry(head, struct nilfs_recovery_block, list); list_del(&rb->list); kfree(rb); } @@ -416,16 +416,16 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum) void nilfs_dispose_segment_list(struct list_head *head) { while (!list_empty(head)) { - struct nilfs_segment_entry *ent - = list_entry(head->next, - struct nilfs_segment_entry, list); + struct nilfs_segment_entry *ent; + + ent = list_first_entry(head, struct nilfs_segment_entry, list); list_del(&ent->list); kfree(ent); } } static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, + struct super_block *sb, struct nilfs_recovery_info *ri) { struct list_head *head = &ri->ri_used_segments; @@ -493,15 +493,15 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, if (unlikely(!bh_org)) return -EIO; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh_org); return 0; } static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, + struct super_block *sb, struct nilfs_root *root, struct list_head *head, unsigned long *nr_salvaged_blocks) @@ -514,7 +514,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, int err = 0, err2 = 0; list_for_each_entry_safe(rb, n, head, list) { - inode = nilfs_iget(sbi->s_super, root, rb->ino); + inode = nilfs_iget(sb, root, rb->ino); if (IS_ERR(inode)) { err = PTR_ERR(inode); inode = NULL; @@ -527,7 +527,8 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, if (unlikely(err)) { loff_t isize = inode->i_size; if (pos + blocksize > isize) - vmtruncate(inode, isize); + nilfs_write_failed(inode->i_mapping, + pos + blocksize); goto failed_inode; } @@ -535,7 +536,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, if (unlikely(err)) goto failed_page; - err = nilfs_set_file_dirty(sbi, inode, 1); + err = nilfs_set_file_dirty(inode, 1); if (unlikely(err)) goto failed_page; @@ -572,11 +573,11 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, * nilfs_do_roll_forward - salvage logical segments newer than the latest * checkpoint * @nilfs: nilfs object - * @sbi: nilfs_sb_info + * @sb: super block instance * @ri: pointer to a nilfs_recovery_info */ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, + struct super_block *sb, struct nilfs_root *root, struct nilfs_recovery_info *ri) { @@ -648,7 +649,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, goto failed; if (flags & NILFS_SS_LOGEND) { err = nilfs_recover_dsync_blocks( - nilfs, sbi, root, &dsync_blocks, + nilfs, sb, root, &dsync_blocks, &nsalvaged_blocks); if (unlikely(err)) goto failed; @@ -681,7 +682,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, if (nsalvaged_blocks) { printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", - sbi->s_super->s_id, nsalvaged_blocks); + sb->s_id, nsalvaged_blocks); ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; } out: @@ -695,7 +696,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, printk(KERN_ERR "NILFS (device %s): Error roll-forwarding " "(err=%d, pseg block=%llu). ", - sbi->s_super->s_id, err, (unsigned long long)pseg_start); + sb->s_id, err, (unsigned long long)pseg_start); goto out; } @@ -724,7 +725,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, /** * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint * @nilfs: nilfs object - * @sbi: nilfs_sb_info + * @sb: super block instance * @ri: pointer to a nilfs_recovery_info struct to store search results. * * Return Value: On success, 0 is returned. On error, one of the following @@ -741,7 +742,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, * %-ENOMEM - Insufficient memory available. */ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, + struct super_block *sb, struct nilfs_recovery_info *ri) { struct nilfs_root *root; @@ -750,32 +751,32 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) return 0; - err = nilfs_attach_checkpoint(sbi, ri->ri_cno, true, &root); + err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root); if (unlikely(err)) { printk(KERN_ERR "NILFS: error loading the latest checkpoint.\n"); return err; } - err = nilfs_do_roll_forward(nilfs, sbi, root, ri); + err = nilfs_do_roll_forward(nilfs, sb, root, ri); if (unlikely(err)) goto failed; if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { - err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri); + err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri); if (unlikely(err)) { printk(KERN_ERR "NILFS: Error preparing segments for " "recovery.\n"); goto failed; } - err = nilfs_attach_segment_constructor(sbi, root); + err = nilfs_attach_log_writer(sb, root); if (unlikely(err)) goto failed; set_nilfs_discontinued(nilfs); - err = nilfs_construct_segment(sbi->s_super); - nilfs_detach_segment_constructor(sbi); + err = nilfs_construct_segment(sb); + nilfs_detach_log_writer(sb); if (unlikely(err)) { printk(KERN_ERR "NILFS: Oops! recovery failed. " diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h deleted file mode 100644 index 35a07157b98..00000000000 --- a/fs/nilfs2/sb.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * sb.h - NILFS on-memory super block structure. - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> - * - */ - -#ifndef _NILFS_SB -#define _NILFS_SB - -#include <linux/types.h> -#include <linux/fs.h> - -/* - * Mount options - */ -struct nilfs_mount_options { - unsigned long mount_opt; - __u64 snapshot_cno; -}; - -struct the_nilfs; -struct nilfs_sc_info; - -/* - * NILFS super-block data in memory - */ -struct nilfs_sb_info { - /* Mount options */ - unsigned long s_mount_opt; - uid_t s_resuid; - gid_t s_resgid; - - unsigned long s_interval; /* construction interval */ - unsigned long s_watermark; /* threshold of data amount - for the segment construction */ - - /* Fundamental members */ - struct super_block *s_super; /* reverse pointer to super_block */ - struct the_nilfs *s_nilfs; - - /* Segment constructor */ - struct list_head s_dirty_files; /* dirty files list */ - struct nilfs_sc_info *s_sc_info; /* segment constructor info */ - spinlock_t s_inode_lock; /* Lock for the nilfs inode. - It covers s_dirty_files list */ - - /* Inode allocator */ - spinlock_t s_next_gen_lock; - u32 s_next_generation; -}; - -static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi) -{ - return sbi->s_sc_info; -} - -/* - * Bit operations for the mount option - */ -#define nilfs_clear_opt(sbi, opt) \ - do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0) -#define nilfs_set_opt(sbi, opt) \ - do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0) -#define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt) -#define nilfs_write_opt(sbi, mask, opt) \ - do { (sbi)->s_mount_opt = \ - (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \ - NILFS_MOUNT_##opt); \ - } while (0) - -#endif /* _NILFS_SB */ diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 0f83e93935b..dc3a9efdaab 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -227,9 +227,9 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, crc = crc32_le(crc, bh->b_data, bh->b_size); } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } raw_sum->ss_datasum = cpu_to_le32(crc); } @@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf, u32 seed) { struct nilfs_super_root *raw_sr; + struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info; + unsigned srsize; u32 crc; raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; + srsize = NILFS_SR_BYTES(nilfs->ns_inode_size); crc = crc32_le(seed, (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), - NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); + srsize - sizeof(raw_sr->sr_sum)); raw_sr->sr_sum = cpu_to_le32(crc); } @@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list) list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { list_del_init(&bh->b_assoc_buffers); - if (buffer_nilfs_allocated(bh)) { - struct page *clone_page = bh->b_page; - - /* remove clone page */ - brelse(bh); - page_cache_release(clone_page); /* for each bh */ - if (page_count(clone_page) <= 2) { - lock_page(clone_page); - nilfs_free_private_page(clone_page); - } - continue; - } brelse(bh); } } @@ -354,8 +345,7 @@ static void nilfs_end_bio_write(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - bio_put(bio); - /* to be detected by submit_seg_bio() */ + /* to be detected by nilfs_segbuf_submit_bio() */ } if (!uptodate) @@ -386,12 +376,12 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_private = segbuf; bio_get(bio); submit_bio(mode, bio); + segbuf->sb_nbio++; if (bio_flagged(bio, BIO_EOPNOTSUPP)) { bio_put(bio); err = -EOPNOTSUPP; goto failed; } - segbuf->sb_nbio++; bio_put(bio); wi->bio = NULL; @@ -426,7 +416,8 @@ static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start, } if (likely(bio)) { bio->bi_bdev = nilfs->ns_bdev; - bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9); + bio->bi_iter.bi_sector = + start << (nilfs->ns_blocksize_bits - 9); } return bio; } @@ -509,7 +500,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * Last BIO is always sent through the following * submission. */ - rw |= REQ_SYNC | REQ_UNPLUG; + rw |= REQ_SYNC; res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 687d090cea3..a1a191634ab 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -104,8 +104,7 @@ struct nilfs_sc_operations { static void nilfs_segctor_start_timer(struct nilfs_sc_info *); static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); -static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, - int); +static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); #define nilfs_cnt32_gt(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ @@ -182,7 +181,6 @@ int nilfs_transaction_begin(struct super_block *sb, struct nilfs_transaction_info *ti, int vacancy_check) { - struct nilfs_sb_info *sbi; struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); @@ -191,10 +189,9 @@ int nilfs_transaction_begin(struct super_block *sb, if (ret > 0) return 0; - vfs_check_frozen(sb, SB_FREEZE_WRITE); + sb_start_intwrite(sb); - sbi = NILFS_SB(sb); - nilfs = sbi->s_nilfs; + nilfs = sb->s_fs_info; down_read(&nilfs->ns_segctor_sem); if (vacancy_check && nilfs_near_disk_full(nilfs)) { up_read(&nilfs->ns_segctor_sem); @@ -208,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb, current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); return ret; } @@ -225,8 +223,7 @@ int nilfs_transaction_begin(struct super_block *sb, int nilfs_transaction_commit(struct super_block *sb) { struct nilfs_transaction_info *ti = current->journal_info; - struct nilfs_sb_info *sbi; - struct nilfs_sc_info *sci; + struct the_nilfs *nilfs = sb->s_fs_info; int err = 0; BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); @@ -235,46 +232,47 @@ int nilfs_transaction_commit(struct super_block *sb) ti->ti_count--; return 0; } - sbi = NILFS_SB(sb); - sci = NILFS_SC(sbi); - if (sci != NULL) { + if (nilfs->ns_writer) { + struct nilfs_sc_info *sci = nilfs->ns_writer; + if (ti->ti_flags & NILFS_TI_COMMIT) nilfs_segctor_start_timer(sci); - if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > - sci->sc_watermark) + if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark) nilfs_segctor_do_flush(sci, 0); } - up_read(&sbi->s_nilfs->ns_segctor_sem); + up_read(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_SYNC) err = nilfs_construct_segment(sb); if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); return err; } void nilfs_transaction_abort(struct super_block *sb) { struct nilfs_transaction_info *ti = current->journal_info; + struct the_nilfs *nilfs = sb->s_fs_info; BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); if (ti->ti_count > 0) { ti->ti_count--; return; } - up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); + up_read(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); } void nilfs_relax_pressure_in_lock(struct super_block *sb) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; if (!sci || !sci->sc_flush_request) return; @@ -294,11 +292,13 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb) downgrade_write(&nilfs->ns_segctor_sem); } -static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, +static void nilfs_transaction_lock(struct super_block *sb, struct nilfs_transaction_info *ti, int gcflag) { struct nilfs_transaction_info *cur_ti = current->journal_info; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; WARN_ON(cur_ti); ti->ti_flags = NILFS_TI_WRITER; @@ -309,30 +309,31 @@ static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, current->journal_info = ti; for (;;) { - down_write(&sbi->s_nilfs->ns_segctor_sem); - if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) + down_write(&nilfs->ns_segctor_sem); + if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) break; - nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); + nilfs_segctor_do_immediate_flush(sci); - up_write(&sbi->s_nilfs->ns_segctor_sem); + up_write(&nilfs->ns_segctor_sem); yield(); } if (gcflag) ti->ti_flags |= NILFS_TI_GC; } -static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) +static void nilfs_transaction_unlock(struct super_block *sb) { struct nilfs_transaction_info *ti = current->journal_info; + struct the_nilfs *nilfs = sb->s_fs_info; BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); BUG_ON(ti->ti_count > 0); - up_write(&sbi->s_nilfs->ns_segctor_sem); + up_write(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; if (!list_empty(&ti->ti_garbage)) - nilfs_dispose_list(sbi, &ti->ti_garbage, 0); + nilfs_dispose_list(nilfs, &ti->ti_garbage, 0); } static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, @@ -430,7 +431,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, nilfs_segctor_map_segsum_entry( sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); - if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) + if (NILFS_I(inode)->i_root && + !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); /* skip finfo */ } @@ -504,17 +506,6 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, return err; } -static int nilfs_handle_bmap_error(int err, const char *fname, - struct inode *inode, struct super_block *sb) -{ - if (err == -EINVAL) { - nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", - inode->i_ino); - err = -EIO; - } - return err; -} - /* * Callback functions that enumerate, mark, and collect dirty blocks */ @@ -524,9 +515,8 @@ static int nilfs_collect_file_data(struct nilfs_sc_info *sci, int err; err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); + if (err < 0) + return err; err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(struct nilfs_binfo_v)); @@ -539,13 +529,7 @@ static int nilfs_collect_file_node(struct nilfs_sc_info *sci, struct buffer_head *bh, struct inode *inode) { - int err; - - err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); - return 0; + return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); } static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, @@ -588,9 +572,8 @@ static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, int err; err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); + if (err < 0) + return err; err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); if (!err) @@ -675,17 +658,14 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, if (unlikely(page->index > last)) break; - if (mapping->host) { - lock_page(page); - if (!page_has_buffers(page)) - create_empty_buffers(page, - 1 << inode->i_blkbits, 0); - unlock_page(page); - } + lock_page(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, 0); + unlock_page(page); bh = head = page_buffers(page); do { - if (!buffer_dirty(bh)) + if (!buffer_dirty(bh) || buffer_async_write(bh)) continue; get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -719,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do { - if (buffer_dirty(bh)) { + if (buffer_dirty(bh) && + !buffer_async_write(bh)) { get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -732,7 +713,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, } } -static void nilfs_dispose_list(struct nilfs_sb_info *sbi, +static void nilfs_dispose_list(struct the_nilfs *nilfs, struct list_head *head, int force) { struct nilfs_inode_info *ii, *n; @@ -740,7 +721,7 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi, unsigned nv = 0; while (!list_empty(head)) { - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); list_for_each_entry_safe(ii, n, head, i_dirty) { list_del_init(&ii->i_dirty); if (force) { @@ -751,14 +732,14 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi, } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { set_bit(NILFS_I_QUEUED, &ii->i_state); list_add_tail(&ii->i_dirty, - &sbi->s_dirty_files); + &nilfs->ns_dirty_files); continue; } ivec[nv++] = ii; if (nv == SC_N_INODEVEC) break; } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); for (pii = ivec; nv > 0; pii++, nv--) iput(&(*pii)->vfs_inode); @@ -776,9 +757,8 @@ static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, ret++; if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) ret++; - if (ret || nilfs_doing_gc()) - if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) - ret++; + if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat)) + ret++; return ret; } @@ -792,34 +772,33 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci) static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) { - struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int ret = 0; - if (nilfs_test_metadata_dirty(sbi->s_nilfs, sci->sc_root)) + if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) set_bit(NILFS_SC_DIRTY, &sci->sc_flags); - spin_lock(&sbi->s_inode_lock); - if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) + spin_lock(&nilfs->ns_inode_lock); + if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci)) ret++; - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return ret; } static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; nilfs_mdt_clear_dirty(sci->sc_root->ifile); nilfs_mdt_clear_dirty(nilfs->ns_cpfile); nilfs_mdt_clear_dirty(nilfs->ns_sufile); - nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); + nilfs_mdt_clear_dirty(nilfs->ns_dat); } static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) { - struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct buffer_head *bh_cp; struct nilfs_checkpoint *raw_cp; int err; @@ -831,7 +810,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) /* The following code is duplicated with cpfile. But, it is needed to collect the checkpoint even if it was not newly created */ - nilfs_mdt_mark_buffer_dirty(bh_cp); + mark_buffer_dirty(bh_cp); nilfs_mdt_mark_dirty(nilfs->ns_cpfile); nilfs_cpfile_put_checkpoint( nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); @@ -843,8 +822,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct buffer_head *bh_cp; struct nilfs_checkpoint *raw_cp; int err; @@ -858,9 +836,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) raw_cp->cp_snapshot_list.ssl_next = 0; raw_cp->cp_snapshot_list.ssl_prev = 0; raw_cp->cp_inodes_count = - cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); + cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); raw_cp->cp_blocks_count = - cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); + cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); raw_cp->cp_nblk_inc = cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); @@ -912,23 +890,26 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, { struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; - unsigned isz = nilfs->ns_inode_size; + unsigned isz, srsz; bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; raw_sr = (struct nilfs_super_root *)bh_sr->b_data; + isz = nilfs->ns_inode_size; + srsz = NILFS_SR_BYTES(isz); - raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); + raw_sr->sr_bytes = cpu_to_le16(srsz); raw_sr->sr_nongc_ctime = cpu_to_le64(nilfs_doing_gc() ? nilfs->ns_nongc_ctime : sci->sc_seg_ctime); raw_sr->sr_flags = 0; - nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr + + nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr + NILFS_SR_DAT_OFFSET(isz), 1); nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + NILFS_SR_CPFILE_OFFSET(isz), 1); nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + NILFS_SR_SUFILE_OFFSET(isz), 1); + memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); } static void nilfs_redirty_inodes(struct list_head *head) @@ -977,8 +958,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, dispose_buffers: while (!list_empty(listp)) { - bh = list_entry(listp->next, struct buffer_head, - b_assoc_buffers); + bh = list_first_entry(listp, struct buffer_head, + b_assoc_buffers); list_del_init(&bh->b_assoc_buffers); brelse(bh); } @@ -1068,8 +1049,7 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; size_t ndone; @@ -1179,7 +1159,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.scnt++; /* Fall through */ case NILFS_ST_DAT: dat_stage: - err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), + err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, &nilfs_sc_dat_ops); if (unlikely(err)) break; @@ -1460,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, nilfs_clear_logs(&sci->sc_segbufs); - err = nilfs_segctor_extend_segments(sci, nilfs, nadd); - if (unlikely(err)) - return err; - if (sci->sc_stage.flags & NILFS_CF_SUFREED) { err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, sci->sc_freesegs, sci->sc_nfreesegs, NULL); WARN_ON(err); /* do not happen */ + sci->sc_stage.flags &= ~NILFS_CF_SUFREED; } + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } @@ -1524,10 +1506,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, nblocks = le32_to_cpu(finfo->fi_nblocks); ndatablk = le32_to_cpu(finfo->fi_ndatablk); - if (buffer_nilfs_node(bh)) - inode = NILFS_BTNC_I(bh->b_page->mapping); - else - inode = NILFS_AS_I(bh->b_page->mapping); + inode = bh->b_page->mapping->host; if (mode == SC_LSEG_DSYNC) sc_op = &nilfs_sc_dsync_ops; @@ -1563,7 +1542,6 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, return 0; failed_bmap: - err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); return err; } @@ -1581,88 +1559,30 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) return 0; } -static int -nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) -{ - struct page *clone_page; - struct buffer_head *bh, *head, *bh2; - void *kaddr; - - bh = head = page_buffers(page); - - clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); - if (unlikely(!clone_page)) - return -ENOMEM; - - bh2 = page_buffers(clone_page); - kaddr = kmap_atomic(page, KM_USER0); - do { - if (list_empty(&bh->b_assoc_buffers)) - continue; - get_bh(bh2); - page_cache_get(clone_page); /* for each bh */ - memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); - bh2->b_blocknr = bh->b_blocknr; - list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); - list_add_tail(&bh->b_assoc_buffers, out); - } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); - kunmap_atomic(kaddr, KM_USER0); - - if (!TestSetPageWriteback(clone_page)) - account_page_writeback(clone_page); - unlock_page(clone_page); - - return 0; -} - -static int nilfs_test_page_to_be_frozen(struct page *page) -{ - struct address_space *mapping = page->mapping; - - if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) - return 0; - - if (page_mapped(page)) { - ClearPageChecked(page); - return 1; - } - return PageChecked(page); -} - -static int nilfs_begin_page_io(struct page *page, struct list_head *out) +static void nilfs_begin_page_io(struct page *page) { if (!page || PageWriteback(page)) /* For split b-tree node pages, this function may be called twice. We ignore the 2nd or later calls by this check. */ - return 0; + return; lock_page(page); clear_page_dirty_for_io(page); set_page_writeback(page); unlock_page(page); - - if (nilfs_test_page_to_be_frozen(page)) { - int err = nilfs_copy_replace_page_buffers(page, out); - if (unlikely(err)) - return err; - } - return 0; } -static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, - struct page **failed_page) +static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; - struct list_head *list = &sci->sc_copied_buffers; - int err; - *failed_page = NULL; list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { struct buffer_head *bh; list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) { lock_page(bd_page); @@ -1676,6 +1596,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); @@ -1687,11 +1608,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, break; } if (bh->b_page != fs_page) { - err = nilfs_begin_page_io(fs_page, list); - if (unlikely(err)) { - *failed_page = fs_page; - goto out; - } + nilfs_begin_page_io(fs_page); fs_page = bh->b_page; } } @@ -1702,11 +1619,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, set_page_writeback(bd_page); unlock_page(bd_page); } - err = nilfs_begin_page_io(fs_page, list); - if (unlikely(err)) - *failed_page = fs_page; - out: - return err; + nilfs_begin_page_io(fs_page); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -1719,24 +1632,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, return ret; } -static void __nilfs_end_page_io(struct page *page, int err) -{ - if (!err) { - if (!nilfs_page_buffers_clean(page)) - __set_page_dirty_nobuffers(page); - ClearPageError(page); - } else { - __set_page_dirty_nobuffers(page); - SetPageError(page); - } - - if (buffer_nilfs_allocated(page_buffers(page))) { - if (TestClearPageWriteback(page)) - dec_zone_page_state(page, NR_WRITEBACK); - } else - end_page_writeback(page); -} - static void nilfs_end_page_io(struct page *page, int err) { if (!page) @@ -1763,39 +1658,19 @@ static void nilfs_end_page_io(struct page *page, int err) return; } - __nilfs_end_page_io(page, err); -} - -static void nilfs_clear_copied_buffers(struct list_head *list, int err) -{ - struct buffer_head *bh, *head; - struct page *page; - - while (!list_empty(list)) { - bh = list_entry(list->next, struct buffer_head, - b_assoc_buffers); - page = bh->b_page; - page_cache_get(page); - head = bh = page_buffers(page); - do { - if (!list_empty(&bh->b_assoc_buffers)) { - list_del_init(&bh->b_assoc_buffers); - if (!err) { - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - clear_buffer_nilfs_volatile(bh); - } - brelse(bh); /* for b_assoc_buffers */ - } - } while ((bh = bh->b_this_page) != head); - - __nilfs_end_page_io(page, err); - page_cache_release(page); + if (!err) { + if (!nilfs_page_buffers_clean(page)) + __set_page_dirty_nobuffers(page); + ClearPageError(page); + } else { + __set_page_dirty_nobuffers(page); + SetPageError(page); } + + end_page_writeback(page); } -static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, - int err) +static void nilfs_abort_logs(struct list_head *logs, int err) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; @@ -1807,6 +1682,7 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1816,6 +1692,7 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); @@ -1825,8 +1702,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, } if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); - if (fs_page && fs_page == failed_page) - return; fs_page = bh->b_page; } } @@ -1845,12 +1720,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, list_splice_tail_init(&sci->sc_write_logs, &logs); ret = nilfs_wait_on_logs(&logs); - nilfs_abort_logs(&logs, NULL, ret ? : err); + nilfs_abort_logs(&logs, ret ? : err); list_splice_tail_init(&sci->sc_segbufs, &logs); nilfs_cancel_segusage(&logs, nilfs->ns_sufile); nilfs_free_incomplete_logs(&logs, nilfs); - nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); if (sci->sc_stage.flags & NILFS_CF_SUFREED) { ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, @@ -1878,7 +1752,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; - struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int update_sr = false; list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { @@ -1888,6 +1762,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1909,6 +1784,8 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); + clear_buffer_delay(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_redirected(bh); if (bh == segbuf->sb_super_root) { @@ -1943,8 +1820,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_end_page_io(fs_page, 0); - nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); - nilfs_drop_collected_inodes(&sci->sc_dirty_files); if (nilfs_doing_gc()) @@ -1981,30 +1856,30 @@ static int nilfs_segctor_wait(struct nilfs_sc_info *sci) return ret; } -static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, - struct nilfs_sb_info *sbi) +static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) { struct nilfs_inode_info *ii, *n; struct inode *ifile = sci->sc_root->ifile; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); retry: - list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { + list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) { if (!ii->i_bh) { struct buffer_head *ibh; int err; - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); err = nilfs_ifile_get_inode_block( ifile, ii->vfs_inode.i_ino, &ibh); if (unlikely(err)) { - nilfs_warning(sbi->s_super, __func__, + nilfs_warning(sci->sc_super, __func__, "failed to get inode block.\n"); return err; } - nilfs_mdt_mark_buffer_dirty(ibh); + mark_buffer_dirty(ibh); nilfs_mdt_mark_dirty(ifile); - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); if (likely(!ii->i_bh)) ii->i_bh = ibh; else @@ -2014,21 +1889,20 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, clear_bit(NILFS_I_QUEUED, &ii->i_state); set_bit(NILFS_I_BUSY, &ii->i_state); - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); + list_move_tail(&ii->i_dirty, &sci->sc_dirty_files); } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return 0; } -static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, - struct nilfs_sb_info *sbi) +static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) { struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_inode_info *ii, *n; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || test_bit(NILFS_I_DIRTY, &ii->i_state)) @@ -2037,10 +1911,9 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &ti->ti_garbage); + list_move_tail(&ii->i_dirty, &ti->ti_garbage); } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); } /* @@ -2048,15 +1921,13 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, */ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; - struct page *failed_page; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int err; sci->sc_stage.scnt = NILFS_ST_INIT; sci->sc_cno = nilfs->ns_cno; - err = nilfs_segctor_check_in_files(sci, sbi); + err = nilfs_segctor_collect_dirty_files(sci, nilfs); if (unlikely(err)) goto out; @@ -2105,11 +1976,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - err = nilfs_segctor_prepare_write(sci, &failed_page); - if (err) { - nilfs_abort_logs(&sci->sc_segbufs, failed_page, err); - goto failed_to_write; - } + nilfs_segctor_prepare_write(sci); nilfs_add_checksums_on_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); @@ -2134,7 +2001,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) } while (sci->sc_stage.scnt != NILFS_ST_DONE); out: - nilfs_segctor_check_out_files(sci, sbi); + nilfs_segctor_drop_written_files(sci, nilfs); return err; failed_to_write: @@ -2187,8 +2054,8 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) */ void nilfs_flush_segment(struct super_block *sb, ino_t ino) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; if (!sci || nilfs_doing_construction()) return; @@ -2277,8 +2144,8 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) */ int nilfs_construct_segment(struct super_block *sb) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_transaction_info *ti; int err; @@ -2315,8 +2182,8 @@ int nilfs_construct_segment(struct super_block *sb) int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, loff_t start, loff_t end) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_inode_info *ii; struct nilfs_transaction_info ti; int err = 0; @@ -2324,33 +2191,33 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, if (!sci) return -EROFS; - nilfs_transaction_lock(sbi, &ti, 0); + nilfs_transaction_lock(sb, &ti, 0); ii = NILFS_I(inode); if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || - nilfs_test_opt(sbi, STRICT_ORDER) || + nilfs_test_opt(nilfs, STRICT_ORDER) || test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || - nilfs_discontinued(sbi->s_nilfs)) { - nilfs_transaction_unlock(sbi); + nilfs_discontinued(nilfs)) { + nilfs_transaction_unlock(sb); err = nilfs_segctor_sync(sci); return err; } - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && !test_bit(NILFS_I_BUSY, &ii->i_state)) { - spin_unlock(&sbi->s_inode_lock); - nilfs_transaction_unlock(sbi); + spin_unlock(&nilfs->ns_inode_lock); + nilfs_transaction_unlock(sb); return 0; } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); sci->sc_dsync_inode = ii; sci->sc_dsync_start = start; sci->sc_dsync_end = end; err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); - nilfs_transaction_unlock(sbi); + nilfs_transaction_unlock(sb); return err; } @@ -2406,8 +2273,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) */ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct nilfs_super_block **sbp; int err = 0; @@ -2425,11 +2291,12 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) nilfs_discontinued(nilfs)) { down_write(&nilfs->ns_sem); err = -EIO; - sbp = nilfs_prepare_super(sbi, + sbp = nilfs_prepare_super(sci->sc_super, nilfs_sb_will_flip(nilfs)); if (likely(sbp)) { nilfs_set_log_cursor(sbp[0], nilfs); - err = nilfs_commit_super(sbi, NILFS_SB_COMMIT); + err = nilfs_commit_super(sci->sc_super, + NILFS_SB_COMMIT); } up_write(&nilfs->ns_sem); } @@ -2454,6 +2321,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) continue; list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); iput(&ii->vfs_inode); } } @@ -2461,16 +2330,15 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, void **kbufs) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_transaction_info ti; int err; if (unlikely(!sci)) return -EROFS; - nilfs_transaction_lock(sbi, &ti, 1); + nilfs_transaction_lock(sb, &ti, 1); err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); if (unlikely(err)) @@ -2498,14 +2366,14 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(sci->sc_interval); } - if (nilfs_test_opt(sbi, DISCARD)) { + if (nilfs_test_opt(nilfs, DISCARD)) { int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, sci->sc_nfreesegs); if (ret) { printk(KERN_WARNING "NILFS warning: error %d on discard request, " "turning discards off for the device\n", ret); - nilfs_clear_opt(sbi, DISCARD); + nilfs_clear_opt(nilfs, DISCARD); } } @@ -2513,16 +2381,15 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, sci->sc_freesegs = NULL; sci->sc_nfreesegs = 0; nilfs_mdt_clear_shadow_map(nilfs->ns_dat); - nilfs_transaction_unlock(sbi); + nilfs_transaction_unlock(sb); return err; } static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) { - struct nilfs_sb_info *sbi = sci->sc_sbi; struct nilfs_transaction_info ti; - nilfs_transaction_lock(sbi, &ti, 0); + nilfs_transaction_lock(sci->sc_super, &ti, 0); nilfs_segctor_construct(sci, mode); /* @@ -2533,7 +2400,7 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) nilfs_segctor_start_timer(sci); - nilfs_transaction_unlock(sbi); + nilfs_transaction_unlock(sci->sc_super); } static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) @@ -2579,7 +2446,7 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; - struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int timeout = 0; sci->sc_timer.data = (unsigned long)current; @@ -2617,7 +2484,7 @@ static int nilfs_segctor_thread(void *arg) if (freezing(current)) { spin_unlock(&sci->sc_state_lock); - refrigerator(); + try_to_freeze(); spin_lock(&sci->sc_state_lock); } else { DEFINE_WAIT(wait); @@ -2690,17 +2557,17 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) /* * Setup & clean-up functions */ -static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, +static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, struct nilfs_root *root) { + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_sc_info *sci; sci = kzalloc(sizeof(*sci), GFP_KERNEL); if (!sci) return NULL; - sci->sc_sbi = sbi; - sci->sc_super = sbi->s_super; + sci->sc_super = sb; nilfs_get_root(root); sci->sc_root = root; @@ -2713,17 +2580,16 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); - INIT_LIST_HEAD(&sci->sc_copied_buffers); init_timer(&sci->sc_timer); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; - if (sbi->s_interval) - sci->sc_interval = sbi->s_interval; - if (sbi->s_watermark) - sci->sc_watermark = sbi->s_watermark; + if (nilfs->ns_interval) + sci->sc_interval = HZ * nilfs->ns_interval; + if (nilfs->ns_watermark) + sci->sc_watermark = nilfs->ns_watermark; return sci; } @@ -2734,12 +2600,11 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) /* The segctord thread was stopped and its timer was removed. But some tasks remain. */ do { - struct nilfs_sb_info *sbi = sci->sc_sbi; struct nilfs_transaction_info ti; - nilfs_transaction_lock(sbi, &ti, 0); + nilfs_transaction_lock(sci->sc_super, &ti, 0); ret = nilfs_segctor_construct(sci, SC_LSEG_SR); - nilfs_transaction_unlock(sbi); + nilfs_transaction_unlock(sci->sc_super); } while (ret && retrycount-- > 0); } @@ -2754,10 +2619,10 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) */ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) { - struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int flag; - up_write(&sbi->s_nilfs->ns_segctor_sem); + up_write(&nilfs->ns_segctor_sem); spin_lock(&sci->sc_state_lock); nilfs_segctor_kill_thread(sci); @@ -2768,12 +2633,10 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); - WARN_ON(!list_empty(&sci->sc_copied_buffers)); - if (!list_empty(&sci->sc_dirty_files)) { - nilfs_warning(sbi->s_super, __func__, + nilfs_warning(sci->sc_super, __func__, "dirty file(s) after the final construction\n"); - nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); + nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); } WARN_ON(!list_empty(&sci->sc_segbufs)); @@ -2781,79 +2644,78 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) nilfs_put_root(sci->sc_root); - down_write(&sbi->s_nilfs->ns_segctor_sem); + down_write(&nilfs->ns_segctor_sem); del_timer_sync(&sci->sc_timer); kfree(sci); } /** - * nilfs_attach_segment_constructor - attach a segment constructor - * @sbi: nilfs_sb_info + * nilfs_attach_log_writer - attach log writer + * @sb: super block instance * @root: root object of the current filesystem tree * - * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, - * initializes it, and starts the segment constructor. + * This allocates a log writer object, initializes it, and starts the + * log writer. * * Return Value: On success, 0 is returned. On error, one of the following * negative error code is returned. * * %-ENOMEM - Insufficient memory available. */ -int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, - struct nilfs_root *root) +int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { + struct the_nilfs *nilfs = sb->s_fs_info; int err; - if (NILFS_SC(sbi)) { + if (nilfs->ns_writer) { /* * This happens if the filesystem was remounted * read/write after nilfs_error degenerated it into a * read-only mount. */ - nilfs_detach_segment_constructor(sbi); + nilfs_detach_log_writer(sb); } - sbi->s_sc_info = nilfs_segctor_new(sbi, root); - if (!sbi->s_sc_info) + nilfs->ns_writer = nilfs_segctor_new(sb, root); + if (!nilfs->ns_writer) return -ENOMEM; - err = nilfs_segctor_start_thread(NILFS_SC(sbi)); + err = nilfs_segctor_start_thread(nilfs->ns_writer); if (err) { - kfree(sbi->s_sc_info); - sbi->s_sc_info = NULL; + kfree(nilfs->ns_writer); + nilfs->ns_writer = NULL; } return err; } /** - * nilfs_detach_segment_constructor - destroy the segment constructor - * @sbi: nilfs_sb_info + * nilfs_detach_log_writer - destroy log writer + * @sb: super block instance * - * nilfs_detach_segment_constructor() kills the segment constructor daemon, - * frees the struct nilfs_sc_info, and destroy the dirty file list. + * This kills log writer daemon, frees the log writer object, and + * destroys list of dirty files. */ -void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) +void nilfs_detach_log_writer(struct super_block *sb) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; LIST_HEAD(garbage_list); down_write(&nilfs->ns_segctor_sem); - if (NILFS_SC(sbi)) { - nilfs_segctor_destroy(NILFS_SC(sbi)); - sbi->s_sc_info = NULL; + if (nilfs->ns_writer) { + nilfs_segctor_destroy(nilfs->ns_writer); + nilfs->ns_writer = NULL; } /* Force to free the list of dirty files */ - spin_lock(&sbi->s_inode_lock); - if (!list_empty(&sbi->s_dirty_files)) { - list_splice_init(&sbi->s_dirty_files, &garbage_list); - nilfs_warning(sbi->s_super, __func__, - "Non empty dirty list after the last " - "segment construction\n"); - } - spin_unlock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); + if (!list_empty(&nilfs->ns_dirty_files)) { + list_splice_init(&nilfs->ns_dirty_files, &garbage_list); + nilfs_warning(sb, __func__, + "Hit dirty file after stopped log writer\n"); + } + spin_unlock(&nilfs->ns_inode_lock); up_write(&nilfs->ns_segctor_sem); - nilfs_dispose_list(sbi, &garbage_list, 1); + nilfs_dispose_list(nilfs, &garbage_list, 1); } diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index cd8056e7cbe..38a1d001331 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -27,7 +27,7 @@ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/nilfs2_fs.h> -#include "sb.h" +#include "nilfs.h" struct nilfs_root; @@ -88,12 +88,10 @@ struct nilfs_segsum_pointer { /** * struct nilfs_sc_info - Segment constructor information * @sc_super: Back pointer to super_block struct - * @sc_sbi: Back pointer to nilfs_sb_info struct * @sc_root: root object of the current filesystem tree * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written - * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data * @sc_freesegs: array of segment numbers to be freed * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -131,14 +129,12 @@ struct nilfs_segsum_pointer { */ struct nilfs_sc_info { struct super_block *sc_super; - struct nilfs_sb_info *sc_sbi; struct nilfs_root *sc_root; unsigned long sc_nblk_inc; struct list_head sc_dirty_files; struct list_head sc_gc_inodes; - struct list_head sc_copied_buffers; __u64 *sc_freesegs; size_t sc_nfreesegs; @@ -235,18 +231,16 @@ extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); -int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, - struct nilfs_root *root); -extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); +int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root); +void nilfs_detach_log_writer(struct super_block *sb); /* recovery.c */ extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t, struct buffer_head **, int); extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_recovery_info *); -extern int nilfs_salvage_orphan_logs(struct the_nilfs *, - struct nilfs_sb_info *, - struct nilfs_recovery_info *); +int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb, + struct nilfs_recovery_info *ri); extern void nilfs_dispose_segment_list(struct list_head *); #endif /* _NILFS_SEGMENT_H */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 1d6f488ccae..2a869c35c36 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -30,10 +30,18 @@ #include "mdt.h" #include "sufile.h" - +/** + * struct nilfs_sufile_info - on-memory private data of sufile + * @mi: on-memory private data of metadata file + * @ncleansegs: number of clean segments + * @allocmin: lower limit of allocatable segment range + * @allocmax: upper limit of allocatable segment range + */ struct nilfs_sufile_info { struct nilfs_mdt_info mi; - unsigned long ncleansegs; + unsigned long ncleansegs;/* number of clean segments */ + __u64 allocmin; /* lower limit of allocatable segment range */ + __u64 allocmax; /* upper limit of allocatable segment range */ }; static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) @@ -96,19 +104,26 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, create, NULL, bhp); } +static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile, + __u64 segnum) +{ + return nilfs_mdt_delete_block(sufile, + nilfs_sufile_get_blkoff(sufile, segnum)); +} + static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, u64 ncleanadd, u64 ndirtyadd) { struct nilfs_sufile_header *header; void *kaddr; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(header_bh); } /** @@ -248,6 +263,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, } /** + * nilfs_sufile_set_alloc_range - limit range of segment to be allocated + * @sufile: inode of segment usage file + * @start: minimum segment number of allocatable region (inclusive) + * @end: maximum segment number of allocatable region (inclusive) + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-ERANGE - invalid segment region + */ +int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) +{ + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + __u64 nsegs; + int ret = -ERANGE; + + down_write(&NILFS_MDT(sufile)->mi_sem); + nsegs = nilfs_sufile_get_nsegments(sufile); + + if (start <= end && end < nsegs) { + sui->allocmin = start; + sui->allocmax = end; + ret = 0; + } + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** * nilfs_sufile_alloc - allocate a segment * @sufile: inode of segment usage file * @segnump: pointer to segment number @@ -269,37 +313,56 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) struct buffer_head *header_bh, *su_bh; struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; void *kaddr; - unsigned long nsegments, ncleansegs, nsus; - int ret, i, j; + unsigned long nsegments, ncleansegs, nsus, cnt; + int ret, j; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); ncleansegs = le64_to_cpu(header->sh_ncleansegs); last_alloc = le64_to_cpu(header->sh_last_alloc); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nsegments = nilfs_sufile_get_nsegments(sufile); + maxsegnum = sui->allocmax; segnum = last_alloc + 1; - maxsegnum = nsegments - 1; - for (i = 0; i < nsegments; i += nsus) { - if (segnum >= nsegments) { - /* wrap around */ - segnum = 0; - maxsegnum = last_alloc; + if (segnum < sui->allocmin || segnum > sui->allocmax) + segnum = sui->allocmin; + + for (cnt = 0; cnt < nsegments; cnt += nsus) { + if (segnum > maxsegnum) { + if (cnt < sui->allocmax - sui->allocmin + 1) { + /* + * wrap around in the limited region. + * if allocation started from + * sui->allocmin, this never happens. + */ + segnum = sui->allocmin; + maxsegnum = last_alloc; + } else if (segnum > sui->allocmin && + sui->allocmax + 1 < nsegments) { + segnum = sui->allocmax + 1; + maxsegnum = nsegments - 1; + } else if (sui->allocmin > 0) { + segnum = 0; + maxsegnum = sui->allocmin - 1; + } else { + break; /* never happens */ + } } ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &su_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); @@ -310,25 +373,25 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) continue; /* found a clean segment */ nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - NILFS_SUI(sufile)->ncleansegs--; - nilfs_mdt_mark_buffer_dirty(header_bh); - nilfs_mdt_mark_buffer_dirty(su_bh); + sui->ncleansegs--; + mark_buffer_dirty(header_bh); + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); brelse(su_bh); *segnump = segnum; goto out_header; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(su_bh); } @@ -350,21 +413,21 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct nilfs_segment_usage *su; void *kaddr; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { printk(KERN_WARNING "%s: segment %llu must be clean\n", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; - nilfs_mdt_mark_buffer_dirty(su_bh); + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -376,11 +439,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, void *kaddr; int clean, dirty; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && su->su_nblocks == cpu_to_le32(0)) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } clean = nilfs_segment_usage_clean(su); @@ -390,12 +453,12 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; - nilfs_mdt_mark_buffer_dirty(su_bh); + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -407,12 +470,12 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, void *kaddr; int sudirty; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_clean(su)) { printk(KERN_WARNING "%s: segment %llu is already clean\n", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } WARN_ON(nilfs_segment_usage_error(su)); @@ -420,8 +483,8 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, sudirty = nilfs_segment_usage_dirty(su); nilfs_segment_usage_set_clean(su); - kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(su_bh); + kunmap_atomic(kaddr); + mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); NILFS_SUI(sufile)->ncleansegs++; @@ -441,7 +504,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (!ret) { - nilfs_mdt_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); } @@ -468,15 +531,15 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); WARN_ON(nilfs_segment_usage_error(su)); if (modtime) su->su_lastmod = cpu_to_le64(modtime); su->su_nblocks = cpu_to_le32(nblocks); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); @@ -505,7 +568,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; - struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; void *kaddr; int ret; @@ -515,7 +578,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); @@ -525,7 +588,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(header_bh); out_sem: @@ -541,22 +604,194 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, void *kaddr; int suclean; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_error(su)) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); NILFS_SUI(sufile)->ncleansegs--; } - nilfs_mdt_mark_buffer_dirty(su_bh); + mark_buffer_dirty(su_bh); + nilfs_mdt_mark_dirty(sufile); +} + +/** + * nilfs_sufile_truncate_range - truncate range of segment array + * @sufile: inode of segment usage file + * @start: start segment number (inclusive) + * @end: end segment number (inclusive) + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid number of segments specified + * + * %-EBUSY - Dirty or active segments are present in the range + */ +static int nilfs_sufile_truncate_range(struct inode *sufile, + __u64 start, __u64 end) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh; + struct buffer_head *su_bh; + struct nilfs_segment_usage *su, *su2; + size_t susz = NILFS_MDT(sufile)->mi_entry_size; + unsigned long segusages_per_block; + unsigned long nsegs, ncleaned; + __u64 segnum; + void *kaddr; + ssize_t n, nc; + int ret; + int j; + + nsegs = nilfs_sufile_get_nsegments(sufile); + + ret = -EINVAL; + if (start > end || start >= nsegs) + goto out; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out; + + segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); + ncleaned = 0; + + for (segnum = start; segnum <= end; segnum += n) { + n = min_t(unsigned long, + segusages_per_block - + nilfs_sufile_get_offset(sufile, segnum), + end - segnum + 1); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, + &su_bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out_header; + /* hole */ + continue; + } + kaddr = kmap_atomic(su_bh->b_page); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum, su_bh, kaddr); + su2 = su; + for (j = 0; j < n; j++, su = (void *)su + susz) { + if ((le32_to_cpu(su->su_flags) & + ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) || + nilfs_segment_is_active(nilfs, segnum + j)) { + ret = -EBUSY; + kunmap_atomic(kaddr); + brelse(su_bh); + goto out_header; + } + } + nc = 0; + for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) { + if (nilfs_segment_usage_error(su)) { + nilfs_segment_usage_set_clean(su); + nc++; + } + } + kunmap_atomic(kaddr); + if (nc > 0) { + mark_buffer_dirty(su_bh); + ncleaned += nc; + } + brelse(su_bh); + + if (n == segusages_per_block) { + /* make hole */ + nilfs_sufile_delete_segment_usage_block(sufile, segnum); + } + } + ret = 0; + +out_header: + if (ncleaned > 0) { + NILFS_SUI(sufile)->ncleansegs += ncleaned; + nilfs_sufile_mod_counter(header_bh, ncleaned, 0); + nilfs_mdt_mark_dirty(sufile); + } + brelse(header_bh); +out: + return ret; +} + +/** + * nilfs_sufile_resize - resize segment array + * @sufile: inode of segment usage file + * @newnsegs: new number of segments + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOSPC - Enough free space is not left for shrinking + * + * %-EBUSY - Dirty or active segments exist in the region to be truncated + */ +int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh; + struct nilfs_sufile_header *header; + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + void *kaddr; + unsigned long nsegs, nrsvsegs; + int ret = 0; + + down_write(&NILFS_MDT(sufile)->mi_sem); + + nsegs = nilfs_sufile_get_nsegments(sufile); + if (nsegs == newnsegs) + goto out; + + ret = -ENOSPC; + nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs); + if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs) + goto out; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out; + + if (newnsegs > nsegs) { + sui->ncleansegs += newnsegs - nsegs; + } else /* newnsegs < nsegs */ { + ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1); + if (ret < 0) + goto out_header; + + sui->ncleansegs -= nsegs - newnsegs; + } + + kaddr = kmap_atomic(header_bh->b_page); + header = kaddr + bh_offset(header_bh); + header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); + kunmap_atomic(kaddr); + + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); + nilfs_set_nsegments(nilfs, newnsegs); + +out_header: + brelse(header_bh); +out: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; } /** @@ -583,7 +818,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, struct nilfs_segment_usage *su; struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; - struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -611,7 +846,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, continue; } - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); for (j = 0; j < n; @@ -624,7 +859,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, si->sui_flags |= (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(su_bh); } ret = nsegs; @@ -635,6 +870,289 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, } /** + * nilfs_sufile_set_suinfo - sets segment usage info + * @sufile: inode of segment usage file + * @buf: array of suinfo_update + * @supsz: byte size of suinfo_update + * @nsup: size of suinfo_update array + * + * Description: Takes an array of nilfs_suinfo_update structs and updates + * segment usage accordingly. Only the fields indicated by the sup_flags + * are updated. + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid values in input (segment number, flags or nblocks) + */ +ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, + unsigned supsz, size_t nsup) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh, *bh; + struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; + struct nilfs_segment_usage *su; + void *kaddr; + unsigned long blkoff, prev_blkoff; + int cleansi, cleansu, dirtysi, dirtysu; + long ncleaned = 0, ndirtied = 0; + int ret = 0; + + if (unlikely(nsup == 0)) + return ret; + + for (sup = buf; sup < supend; sup = (void *)sup + supsz) { + if (sup->sup_segnum >= nilfs->ns_nsegments + || (sup->sup_flags & + (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) + || (nilfs_suinfo_update_nblocks(sup) && + sup->sup_sui.sui_nblocks > + nilfs->ns_blocks_per_segment)) + return -EINVAL; + } + + down_write(&NILFS_MDT(sufile)->mi_sem); + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + sup = buf; + blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); + ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); + if (ret < 0) + goto out_header; + + for (;;) { + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage( + sufile, sup->sup_segnum, bh, kaddr); + + if (nilfs_suinfo_update_lastmod(sup)) + su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); + + if (nilfs_suinfo_update_nblocks(sup)) + su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks); + + if (nilfs_suinfo_update_flags(sup)) { + /* + * Active flag is a virtual flag projected by running + * nilfs kernel code - drop it not to write it to + * disk. + */ + sup->sup_sui.sui_flags &= + ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); + + cleansi = nilfs_suinfo_clean(&sup->sup_sui); + cleansu = nilfs_segment_usage_clean(su); + dirtysi = nilfs_suinfo_dirty(&sup->sup_sui); + dirtysu = nilfs_segment_usage_dirty(su); + + if (cleansi && !cleansu) + ++ncleaned; + else if (!cleansi && cleansu) + --ncleaned; + + if (dirtysi && !dirtysu) + ++ndirtied; + else if (!dirtysi && dirtysu) + --ndirtied; + + su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); + } + + kunmap_atomic(kaddr); + + sup = (void *)sup + supsz; + if (sup >= supend) + break; + + prev_blkoff = blkoff; + blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); + if (blkoff == prev_blkoff) + continue; + + /* get different block */ + mark_buffer_dirty(bh); + put_bh(bh); + ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); + if (unlikely(ret < 0)) + goto out_mark; + } + mark_buffer_dirty(bh); + put_bh(bh); + + out_mark: + if (ncleaned || ndirtied) { + nilfs_sufile_mod_counter(header_bh, (u64)ncleaned, + (u64)ndirtied); + NILFS_SUI(sufile)->ncleansegs += ncleaned; + } + nilfs_mdt_mark_dirty(sufile); + out_header: + put_bh(header_bh); + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_trim_fs() - trim ioctl handle function + * @sufile: inode of segment usage file + * @range: fstrim_range structure + * + * start: First Byte to trim + * len: number of Bytes to trim from start + * minlen: minimum extent length in Bytes + * + * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes + * from start to start+len. start is rounded up to the next block boundary + * and start+len is rounded down. For each clean segment blkdev_issue_discard + * function is invoked. + * + * Return Value: On success, 0 is returned or negative error code, otherwise. + */ +int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *su_bh; + struct nilfs_segment_usage *su; + void *kaddr; + size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; + sector_t seg_start, seg_end, start_block, end_block; + sector_t start = 0, nblocks = 0; + u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0; + int ret = 0; + unsigned int sects_per_block; + + sects_per_block = (1 << nilfs->ns_blocksize_bits) / + bdev_logical_block_size(nilfs->ns_bdev); + len = range->len >> nilfs->ns_blocksize_bits; + minlen = range->minlen >> nilfs->ns_blocksize_bits; + max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment); + + if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits) + return -EINVAL; + + start_block = (range->start + nilfs->ns_blocksize - 1) >> + nilfs->ns_blocksize_bits; + + /* + * range->len can be very large (actually, it is set to + * ULLONG_MAX by default) - truncate upper end of the range + * carefully so as not to overflow. + */ + if (max_blocks - start_block < len) + end_block = max_blocks - 1; + else + end_block = start_block + len - 1; + + segnum = nilfs_get_segnum_of_block(nilfs, start_block); + segnum_end = nilfs_get_segnum_of_block(nilfs, end_block); + + down_read(&NILFS_MDT(sufile)->mi_sem); + + while (segnum <= segnum_end) { + n = nilfs_sufile_segment_usages_in_block(sufile, segnum, + segnum_end); + + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, + &su_bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out_sem; + /* hole */ + segnum += n; + continue; + } + + kaddr = kmap_atomic(su_bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, + su_bh, kaddr); + for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { + if (!nilfs_segment_usage_clean(su)) + continue; + + nilfs_get_segment_range(nilfs, segnum, &seg_start, + &seg_end); + + if (!nblocks) { + /* start new extent */ + start = seg_start; + nblocks = seg_end - seg_start + 1; + continue; + } + + if (start + nblocks == seg_start) { + /* add to previous extent */ + nblocks += seg_end - seg_start + 1; + continue; + } + + /* discard previous extent */ + if (start < start_block) { + nblocks -= start_block - start; + start = start_block; + } + + if (nblocks >= minlen) { + kunmap_atomic(kaddr); + + ret = blkdev_issue_discard(nilfs->ns_bdev, + start * sects_per_block, + nblocks * sects_per_block, + GFP_NOFS, 0); + if (ret < 0) { + put_bh(su_bh); + goto out_sem; + } + + ndiscarded += nblocks; + kaddr = kmap_atomic(su_bh->b_page); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum, su_bh, kaddr); + } + + /* start new extent */ + start = seg_start; + nblocks = seg_end - seg_start + 1; + } + kunmap_atomic(kaddr); + put_bh(su_bh); + } + + + if (nblocks) { + /* discard last extent */ + if (start < start_block) { + nblocks -= start_block - start; + start = start_block; + } + if (start + nblocks > end_block + 1) + nblocks = end_block - start + 1; + + if (nblocks >= minlen) { + ret = blkdev_issue_discard(nilfs->ns_bdev, + start * sects_per_block, + nblocks * sects_per_block, + GFP_NOFS, 0); + if (!ret) + ndiscarded += nblocks; + } + } + +out_sem: + up_read(&NILFS_MDT(sufile)->mi_sem); + + range->len = ndiscarded << nilfs->ns_blocksize_bits; + return ret; +} + +/** * nilfs_sufile_read - read or get sufile inode * @sb: super block instance * @susize: size of a segment usage entry @@ -651,6 +1169,18 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, void *kaddr; int err; + if (susize > sb->s_blocksize) { + printk(KERN_ERR + "NILFS: too large segment usage size: %zu bytes.\n", + susize); + return -EINVAL; + } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { + printk(KERN_ERR + "NILFS: too small segment usage size: %zu bytes.\n", + susize); + return -EINVAL; + } + sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); if (unlikely(!sufile)) return -ENOMEM; @@ -673,12 +1203,15 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, goto failed; sui = NILFS_SUI(sufile); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(header_bh); + sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; + sui->allocmin = 0; + unlock_new_inode(sufile); out: *inodep = sufile; diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index a943fbacb45..b8afd72f237 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -31,11 +31,12 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) { - return NILFS_I_NILFS(sufile)->ns_nsegments; + return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments; } unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); +int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end); int nilfs_sufile_alloc(struct inode *, __u64 *); int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, @@ -43,6 +44,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, size_t); +ssize_t nilfs_sufile_set_suinfo(struct inode *, void *, unsigned , size_t); int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, void (*dofunc)(struct inode *, __u64, @@ -61,8 +63,10 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs); int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_inode *raw_inode, struct inode **inodep); +int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range); /** * nilfs_sufile_scrap - make a segment garbage diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f804d41ec9d..8c532b2ca3a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -43,11 +43,9 @@ #include <linux/init.h> #include <linux/blkdev.h> #include <linux/parser.h> -#include <linux/random.h> #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> -#include <linux/kobject.h> #include <linux/seq_file.h> #include <linux/mount.h> #include "nilfs.h" @@ -58,6 +56,7 @@ #include "btnode.h" #include "page.h" #include "cpfile.h" +#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */ #include "ifile.h" #include "dat.h" #include "segment.h" @@ -73,23 +72,23 @@ struct kmem_cache *nilfs_transaction_cachep; struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; -static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount); +static int nilfs_setup_super(struct super_block *sb, int is_mount); static int nilfs_remount(struct super_block *sb, int *flags, char *data); -static void nilfs_set_error(struct nilfs_sb_info *sbi) +static void nilfs_set_error(struct super_block *sb) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; down_write(&nilfs->ns_sem); if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { nilfs->ns_mount_state |= NILFS_ERROR_FS; - sbp = nilfs_prepare_super(sbi, 0); + sbp = nilfs_prepare_super(sb, 0); if (likely(sbp)) { sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); if (sbp[1]) sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); - nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); + nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } } up_write(&nilfs->ns_sem); @@ -110,25 +109,30 @@ static void nilfs_set_error(struct nilfs_sb_info *sbi) void nilfs_error(struct super_block *sb, const char *function, const char *fmt, ...) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sb->s_fs_info; + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); if (!(sb->s_flags & MS_RDONLY)) { - nilfs_set_error(sbi); + nilfs_set_error(sb); - if (nilfs_test_opt(sbi, ERRORS_RO)) { + if (nilfs_test_opt(nilfs, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; } } - if (nilfs_test_opt(sbi, ERRORS_PANIC)) + if (nilfs_test_opt(nilfs, ERRORS_PANIC)) panic("NILFS (device %s): panic forced after error\n", sb->s_id); } @@ -136,13 +140,17 @@ void nilfs_error(struct super_block *sb, const char *function, void nilfs_warning(struct super_block *sb, const char *function, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_WARNING "NILFS warning (device %s): %s: ", - sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); } @@ -158,12 +166,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_state = 0; ii->i_cno = 0; ii->vfs_inode.i_version = 1; - nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); + nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi); return &ii->vfs_inode; } -void nilfs_destroy_inode(struct inode *inode) +static void nilfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); struct nilfs_mdt_info *mdi = NILFS_MDT(inode); if (mdi) { @@ -173,14 +182,19 @@ void nilfs_destroy_inode(struct inode *inode) kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } -static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) +void nilfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, nilfs_i_callback); +} + +static int nilfs_sync_super(struct super_block *sb, int flag) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; int err; retry: set_buffer_dirty(nilfs->ns_sbh[0]); - if (nilfs_test_opt(sbi, BARRIER)) { + if (nilfs_test_opt(nilfs, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], WRITE_SYNC | WRITE_FLUSH_FUA); } else { @@ -247,10 +261,10 @@ void nilfs_set_log_cursor(struct nilfs_super_block *sbp, spin_unlock(&nilfs->ns_last_segment_lock); } -struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi, +struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, int flip) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; /* nilfs->ns_sem must be locked by the caller. */ @@ -260,7 +274,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi, memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); } else { printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", - sbi->s_super->s_id); + sb->s_id); return NULL; } } else if (sbp[1] && @@ -274,9 +288,9 @@ struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi, return sbp; } -int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag) +int nilfs_commit_super(struct super_block *sb, int flag) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; time_t t; @@ -296,27 +310,28 @@ int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag) nilfs->ns_sbsize)); } clear_nilfs_sb_dirty(nilfs); - return nilfs_sync_super(sbi, flag); + return nilfs_sync_super(sb, flag); } /** * nilfs_cleanup_super() - write filesystem state for cleanup - * @sbi: nilfs_sb_info to be unmounted or degraded to read-only + * @sb: super block instance to be unmounted or degraded to read-only * * This function restores state flags in the on-disk super block. * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the * filesystem was not clean previously. */ -int nilfs_cleanup_super(struct nilfs_sb_info *sbi) +int nilfs_cleanup_super(struct super_block *sb) { + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int flag = NILFS_SB_COMMIT; int ret = -EIO; - sbp = nilfs_prepare_super(sbi, 0); + sbp = nilfs_prepare_super(sb, 0); if (sbp) { - sbp[0]->s_state = cpu_to_le16(sbi->s_nilfs->ns_mount_state); - nilfs_set_log_cursor(sbp[0], sbi->s_nilfs); + sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); + nilfs_set_log_cursor(sbp[0], nilfs); if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { /* * make the "clean" flag also to the opposite @@ -326,21 +341,148 @@ int nilfs_cleanup_super(struct nilfs_sb_info *sbi) sbp[1]->s_state = sbp[0]->s_state; flag = NILFS_SB_COMMIT_ALL; } - ret = nilfs_commit_super(sbi, flag); + ret = nilfs_commit_super(sb, flag); + } + return ret; +} + +/** + * nilfs_move_2nd_super - relocate secondary super block + * @sb: super block instance + * @sb2off: new offset of the secondary super block (in bytes) + */ +static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct buffer_head *nsbh; + struct nilfs_super_block *nsbp; + sector_t blocknr, newblocknr; + unsigned long offset; + int sb2i = -1; /* array index of the secondary superblock */ + int ret = 0; + + /* nilfs->ns_sem must be locked by the caller. */ + if (nilfs->ns_sbh[1] && + nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) { + sb2i = 1; + blocknr = nilfs->ns_sbh[1]->b_blocknr; + } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { + sb2i = 0; + blocknr = nilfs->ns_sbh[0]->b_blocknr; + } + if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) + goto out; /* super block location is unchanged */ + + /* Get new super block buffer */ + newblocknr = sb2off >> nilfs->ns_blocksize_bits; + offset = sb2off & (nilfs->ns_blocksize - 1); + nsbh = sb_getblk(sb, newblocknr); + if (!nsbh) { + printk(KERN_WARNING + "NILFS warning: unable to move secondary superblock " + "to block %llu\n", (unsigned long long)newblocknr); + ret = -EIO; + goto out; + } + nsbp = (void *)nsbh->b_data + offset; + memset(nsbp, 0, nilfs->ns_blocksize); + + if (sb2i >= 0) { + memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); + brelse(nilfs->ns_sbh[sb2i]); + nilfs->ns_sbh[sb2i] = nsbh; + nilfs->ns_sbp[sb2i] = nsbp; + } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) { + /* secondary super block will be restored to index 1 */ + nilfs->ns_sbh[1] = nsbh; + nilfs->ns_sbp[1] = nsbp; + } else { + brelse(nsbh); + } +out: + return ret; +} + +/** + * nilfs_resize_fs - resize the filesystem + * @sb: super block instance + * @newsize: new size of the filesystem (in bytes) + */ +int nilfs_resize_fs(struct super_block *sb, __u64 newsize) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + __u64 devsize, newnsegs; + loff_t sb2off; + int ret; + + ret = -ERANGE; + devsize = i_size_read(sb->s_bdev->bd_inode); + if (newsize > devsize) + goto out; + + /* + * Write lock is required to protect some functions depending + * on the number of segments, the number of reserved segments, + * and so forth. + */ + down_write(&nilfs->ns_segctor_sem); + + sb2off = NILFS_SB2_OFFSET_BYTES(newsize); + newnsegs = sb2off >> nilfs->ns_blocksize_bits; + do_div(newnsegs, nilfs->ns_blocks_per_segment); + + ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); + up_write(&nilfs->ns_segctor_sem); + if (ret < 0) + goto out; + + ret = nilfs_construct_segment(sb); + if (ret < 0) + goto out; + + down_write(&nilfs->ns_sem); + nilfs_move_2nd_super(sb, sb2off); + ret = -EIO; + sbp = nilfs_prepare_super(sb, 0); + if (likely(sbp)) { + nilfs_set_log_cursor(sbp[0], nilfs); + /* + * Drop NILFS_RESIZE_FS flag for compatibility with + * mount-time resize which may be implemented in a + * future release. + */ + sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & + ~NILFS_RESIZE_FS); + sbp[0]->s_dev_size = cpu_to_le64(newsize); + sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments); + if (sbp[1]) + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } + up_write(&nilfs->ns_sem); + + /* + * Reset the range of allocatable segments last. This order + * is important in the case of expansion because the secondary + * superblock must be protected from log write until migration + * completes. + */ + if (!ret) + nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1); +out: return ret; } static void nilfs_put_super(struct super_block *sb) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; - nilfs_detach_segment_constructor(sbi); + nilfs_detach_log_writer(sb); if (!(sb->s_flags & MS_RDONLY)) { down_write(&nilfs->ns_sem); - nilfs_cleanup_super(sbi); + nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } @@ -349,15 +491,12 @@ static void nilfs_put_super(struct super_block *sb) iput(nilfs->ns_dat); destroy_nilfs(nilfs); - sbi->s_super = NULL; sb->s_fs_info = NULL; - kfree(sbi); } static int nilfs_sync_fs(struct super_block *sb, int wait) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int err = 0; @@ -367,10 +506,10 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) down_write(&nilfs->ns_sem); if (nilfs_sb_dirty(nilfs)) { - sbp = nilfs_prepare_super(sbi, nilfs_sb_will_flip(nilfs)); + sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs)); if (likely(sbp)) { nilfs_set_log_cursor(sbp[0], nilfs); - nilfs_commit_super(sbi, NILFS_SB_COMMIT); + nilfs_commit_super(sb, NILFS_SB_COMMIT); } } up_write(&nilfs->ns_sem); @@ -378,10 +517,10 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) return err; } -int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, +int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, struct nilfs_root **rootp) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; struct nilfs_checkpoint *raw_cp; struct buffer_head *bh_cp; @@ -410,13 +549,15 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, goto failed; } - err = nilfs_ifile_read(sbi->s_super, root, nilfs->ns_inode_size, + err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size, &raw_cp->cp_ifile_inode, &root->ifile); if (err) goto failed_bh; - atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); - atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); + atomic64_set(&root->inodes_count, + le64_to_cpu(raw_cp->cp_inodes_count)); + atomic64_set(&root->blocks_count, + le64_to_cpu(raw_cp->cp_blocks_count)); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); @@ -434,8 +575,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, static int nilfs_freeze(struct super_block *sb) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; int err; if (sb->s_flags & MS_RDONLY) @@ -443,21 +583,20 @@ static int nilfs_freeze(struct super_block *sb) /* Mark super block clean */ down_write(&nilfs->ns_sem); - err = nilfs_cleanup_super(sbi); + err = nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); return err; } static int nilfs_unfreeze(struct super_block *sb) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; if (sb->s_flags & MS_RDONLY) return 0; down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi, false); + nilfs_setup_super(sb, false); up_write(&nilfs->ns_sem); return 0; } @@ -472,6 +611,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; + u64 nmaxinodes, nfreeinodes; int err; /* @@ -496,14 +636,34 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) if (unlikely(err)) return err; + err = nilfs_ifile_count_free_inodes(root->ifile, + &nmaxinodes, &nfreeinodes); + if (unlikely(err)) { + printk(KERN_WARNING + "NILFS warning: fail to count free inodes: err %d.\n", + err); + if (err == -ERANGE) { + /* + * If nilfs_palloc_count_max_entries() returns + * -ERANGE error code then we simply treat + * curent inodes count as maximum possible and + * zero as free inodes value. + */ + nmaxinodes = atomic64_read(&root->inodes_count); + nfreeinodes = 0; + err = 0; + } else + return err; + } + buf->f_type = NILFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = blocks - overhead; buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; - buf->f_files = atomic_read(&root->inodes_count); - buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ + buf->f_files = nmaxinodes; + buf->f_ffree = nfreeinodes; buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); @@ -511,25 +671,25 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) +static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry) { - struct super_block *sb = vfs->mnt_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; + struct super_block *sb = dentry->d_sb; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root; - if (!nilfs_test_opt(sbi, BARRIER)) + if (!nilfs_test_opt(nilfs, BARRIER)) seq_puts(seq, ",nobarrier"); if (root->cno != NILFS_CPTREE_CURRENT_CNO) seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); - if (nilfs_test_opt(sbi, ERRORS_PANIC)) + if (nilfs_test_opt(nilfs, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); - if (nilfs_test_opt(sbi, ERRORS_CONT)) + if (nilfs_test_opt(nilfs, ERRORS_CONT)) seq_puts(seq, ",errors=continue"); - if (nilfs_test_opt(sbi, STRICT_ORDER)) + if (nilfs_test_opt(nilfs, STRICT_ORDER)) seq_puts(seq, ",order=strict"); - if (nilfs_test_opt(sbi, NORECOVERY)) + if (nilfs_test_opt(nilfs, NORECOVERY)) seq_puts(seq, ",norecovery"); - if (nilfs_test_opt(sbi, DISCARD)) + if (nilfs_test_opt(nilfs, DISCARD)) seq_puts(seq, ",discard"); return 0; @@ -539,20 +699,13 @@ static const struct super_operations nilfs_sops = { .alloc_inode = nilfs_alloc_inode, .destroy_inode = nilfs_destroy_inode, .dirty_inode = nilfs_dirty_inode, - /* .write_inode = nilfs_write_inode, */ - /* .put_inode = nilfs_put_inode, */ - /* .drop_inode = nilfs_drop_inode, */ .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, - /* .write_super = nilfs_write_super, */ .sync_fs = nilfs_sync_fs, .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, - /* .write_super_lockfs */ - /* .unlockfs */ .statfs = nilfs_statfs, .remount_fs = nilfs_remount, - /* .umount_begin */ .show_options = nilfs_show_options }; @@ -578,7 +731,7 @@ static match_table_t tokens = { static int parse_options(char *options, struct super_block *sb, int is_remount) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sb->s_fs_info; char *p; substring_t args[MAX_OPT_ARGS]; @@ -593,29 +746,29 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) token = match_token(p, tokens, args); switch (token) { case Opt_barrier: - nilfs_set_opt(sbi, BARRIER); + nilfs_set_opt(nilfs, BARRIER); break; case Opt_nobarrier: - nilfs_clear_opt(sbi, BARRIER); + nilfs_clear_opt(nilfs, BARRIER); break; case Opt_order: if (strcmp(args[0].from, "relaxed") == 0) /* Ordered data semantics */ - nilfs_clear_opt(sbi, STRICT_ORDER); + nilfs_clear_opt(nilfs, STRICT_ORDER); else if (strcmp(args[0].from, "strict") == 0) /* Strict in-order semantics */ - nilfs_set_opt(sbi, STRICT_ORDER); + nilfs_set_opt(nilfs, STRICT_ORDER); else return 0; break; case Opt_err_panic: - nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); + nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC); break; case Opt_err_ro: - nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); + nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO); break; case Opt_err_cont: - nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); + nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT); break; case Opt_snapshot: if (is_remount) { @@ -626,13 +779,13 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) } break; case Opt_norecovery: - nilfs_set_opt(sbi, NORECOVERY); + nilfs_set_opt(nilfs, NORECOVERY); break; case Opt_discard: - nilfs_set_opt(sbi, DISCARD); + nilfs_set_opt(nilfs, DISCARD); break; case Opt_nodiscard: - nilfs_clear_opt(sbi, DISCARD); + nilfs_clear_opt(nilfs, DISCARD); break; default: printk(KERN_ERR @@ -644,22 +797,24 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) } static inline void -nilfs_set_default_options(struct nilfs_sb_info *sbi, +nilfs_set_default_options(struct super_block *sb, struct nilfs_super_block *sbp) { - sbi->s_mount_opt = + struct the_nilfs *nilfs = sb->s_fs_info; + + nilfs->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; } -static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount) +static int nilfs_setup_super(struct super_block *sb, int is_mount) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int max_mnt_count; int mnt_count; /* nilfs->ns_sem must be locked by the caller. */ - sbp = nilfs_prepare_super(sbi, 0); + sbp = nilfs_prepare_super(sb, 0); if (!sbp) return -EIO; @@ -688,8 +843,9 @@ skip_mount_setup: sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); /* synchronize sbp[1] with sbp[0] */ - memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); - return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); + if (sbp[1]) + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, @@ -710,7 +866,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, struct nilfs_super_block *sbp, char *data) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sb->s_fs_info; sb->s_magic = le16_to_cpu(sbp->s_magic); @@ -719,12 +875,12 @@ int nilfs_store_magic_and_option(struct super_block *sb, sb->s_flags |= MS_NOATIME; #endif - nilfs_set_default_options(sbi, sbp); + nilfs_set_default_options(sb, sbp); - sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); - sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); - sbi->s_interval = le32_to_cpu(sbp->s_c_interval); - sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); + nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); + nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); + nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); + nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); return !parse_options(data, sb, 0) ? -EINVAL : 0 ; } @@ -777,9 +933,8 @@ static int nilfs_get_root_dentry(struct super_block *sb, if (root->cno == NILFS_CPTREE_CURRENT_CNO) { dentry = d_find_alias(inode); if (!dentry) { - dentry = d_alloc_root(inode); + dentry = d_make_root(inode); if (!dentry) { - iput(inode); ret = -ENOMEM; goto failed_dentry; } @@ -805,10 +960,12 @@ static int nilfs_get_root_dentry(struct super_block *sb, static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, struct dentry **root_dentry) { - struct the_nilfs *nilfs = NILFS_SB(s)->s_nilfs; + struct the_nilfs *nilfs = s->s_fs_info; struct nilfs_root *root; int ret; + mutex_lock(&nilfs->ns_snapshot_mount_mutex); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); up_read(&nilfs->ns_segctor_sem); @@ -823,7 +980,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, goto out; } - ret = nilfs_attach_checkpoint(NILFS_SB(s), cno, false, &root); + ret = nilfs_attach_checkpoint(s, cno, false, &root); if (ret) { printk(KERN_ERR "NILFS: error loading snapshot " "(checkpoint number=%llu).\n", @@ -833,31 +990,25 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = nilfs_get_root_dentry(s, root, root_dentry); nilfs_put_root(root); out: + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); return ret; } -static int nilfs_tree_was_touched(struct dentry *root_dentry) -{ - return atomic_read(&root_dentry->d_count) > 1; -} - /** - * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint + * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint * @root_dentry: root dentry of the tree to be shrunk * * This function returns true if the tree was in-use. */ -static int nilfs_try_to_shrink_tree(struct dentry *root_dentry) +static bool nilfs_tree_is_busy(struct dentry *root_dentry) { - if (have_submounts(root_dentry)) - return true; shrink_dcache_parent(root_dentry); - return nilfs_tree_was_touched(root_dentry); + return d_count(root_dentry) > 1; } int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) { - struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; struct inode *inode; struct dentry *dentry; @@ -870,14 +1021,13 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) return true; /* protect recent checkpoints */ ret = false; - root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); + root = nilfs_lookup_root(nilfs, cno); if (root) { inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); if (inode) { dentry = d_find_alias(inode); if (dentry) { - if (nilfs_tree_was_touched(dentry)) - ret = nilfs_try_to_shrink_tree(dentry); + ret = nilfs_tree_is_busy(dentry); dput(dentry); } iput(inode); @@ -900,57 +1050,36 @@ static int nilfs_fill_super(struct super_block *sb, void *data, int silent) { struct the_nilfs *nilfs; - struct nilfs_sb_info *sbi; struct nilfs_root *fsroot; struct backing_dev_info *bdi; __u64 cno; int err; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) + nilfs = alloc_nilfs(sb->s_bdev); + if (!nilfs) return -ENOMEM; - sb->s_fs_info = sbi; - sbi->s_super = sb; + sb->s_fs_info = nilfs; - nilfs = alloc_nilfs(sb->s_bdev); - if (!nilfs) { - err = -ENOMEM; - goto failed_sbi; - } - sbi->s_nilfs = nilfs; - - err = init_nilfs(nilfs, sbi, (char *)data); + err = init_nilfs(nilfs, sb, (char *)data); if (err) goto failed_nilfs; - spin_lock_init(&sbi->s_inode_lock); - INIT_LIST_HEAD(&sbi->s_dirty_files); - - /* - * Following initialization is overlapped because - * nilfs_sb_info structure has been cleared at the beginning. - * But we reserve them to keep our interest and make ready - * for the future change. - */ - get_random_bytes(&sbi->s_next_generation, - sizeof(sbi->s_next_generation)); - spin_lock_init(&sbi->s_next_gen_lock); - sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; + sb->s_max_links = NILFS_LINK_MAX; bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; sb->s_bdi = bdi ? : &default_backing_dev_info; - err = load_nilfs(nilfs, sbi); + err = load_nilfs(nilfs, sb); if (err) goto failed_nilfs; cno = nilfs_last_cno(nilfs); - err = nilfs_attach_checkpoint(sbi, cno, true, &fsroot); + err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { printk(KERN_ERR "NILFS: error loading last checkpoint " "(checkpoint number=%llu).\n", (unsigned long long)cno); @@ -958,7 +1087,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) } if (!(sb->s_flags & MS_RDONLY)) { - err = nilfs_attach_segment_constructor(sbi, fsroot); + err = nilfs_attach_log_writer(sb, fsroot); if (err) goto failed_checkpoint; } @@ -971,14 +1100,14 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) if (!(sb->s_flags & MS_RDONLY)) { down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi, true); + nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } return 0; failed_segctor: - nilfs_detach_segment_constructor(sbi); + nilfs_detach_log_writer(sb); failed_checkpoint: nilfs_put_root(fsroot); @@ -990,23 +1119,19 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) failed_nilfs: destroy_nilfs(nilfs); - - failed_sbi: - sb->s_fs_info = NULL; - kfree(sbi); return err; } static int nilfs_remount(struct super_block *sb, int *flags, char *data) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; unsigned long old_sb_flags; - struct nilfs_mount_options old_opts; + unsigned long old_mount_opt; int err; + sync_filesystem(sb); old_sb_flags = sb->s_flags; - old_opts.mount_opt = sbi->s_mount_opt; + old_mount_opt = nilfs->ns_mount_opt; if (!parse_options(data, sb, 1)) { err = -EINVAL; @@ -1026,8 +1151,8 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) goto out; if (*flags & MS_RDONLY) { - /* Shutting down the segment constructor */ - nilfs_detach_segment_constructor(sbi); + /* Shutting down log writer */ + nilfs_detach_log_writer(sb); sb->s_flags |= MS_RDONLY; /* @@ -1035,7 +1160,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) * the RDONLY flag and then mark the partition as valid again. */ down_write(&nilfs->ns_sem); - nilfs_cleanup_super(sbi); + nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } else { __u64 features; @@ -1062,12 +1187,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; root = NILFS_I(sb->s_root->d_inode)->i_root; - err = nilfs_attach_segment_constructor(sbi, root); + err = nilfs_attach_log_writer(sb, root); if (err) goto restore_opts; down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi, true); + nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } out: @@ -1075,13 +1200,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) restore_opts: sb->s_flags = old_sb_flags; - sbi->s_mount_opt = old_opts.mount_opt; + nilfs->ns_mount_opt = old_mount_opt; return err; } struct nilfs_super_data { struct block_device *bdev; - struct nilfs_sb_info *sbi; __u64 cno; int flags; }; @@ -1147,14 +1271,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags, { struct nilfs_super_data sd; struct super_block *s; - fmode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ | FMODE_EXCL; struct dentry *root_dentry; int err, s_new = false; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; - sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); + sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); if (IS_ERR(sd.bdev)) return ERR_CAST(sd.bdev); @@ -1176,7 +1300,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags, err = -EBUSY; goto failed; } - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev); + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, + sd.bdev); mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { err = PTR_ERR(s); @@ -1189,7 +1314,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags, s_new = true; /* New superblock instance created */ - s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); @@ -1200,11 +1324,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; } else if (!sd.cno) { - int busy = false; - - if (nilfs_tree_was_touched(s->s_root)) { - busy = nilfs_try_to_shrink_tree(s->s_root); - if (busy && (flags ^ s->s_flags) & MS_RDONLY) { + if (nilfs_tree_is_busy(s->s_root)) { + if ((flags ^ s->s_flags) & MS_RDONLY) { printk(KERN_ERR "NILFS: the device already " "has a %s mount.\n", (s->s_flags & MS_RDONLY) ? @@ -1212,8 +1333,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, err = -EBUSY; goto failed_super; } - } - if (!busy) { + } else { /* * Try remount to setup mount states if the current * tree is not mounted and only snapshots use this sb. @@ -1233,7 +1353,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (!s_new) - close_bdev_exclusive(sd.bdev, mode); + blkdev_put(sd.bdev, mode); return root_dentry; @@ -1242,7 +1362,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, failed: if (!s_new) - close_bdev_exclusive(sd.bdev, mode); + blkdev_put(sd.bdev, mode); return ERR_PTR(err); } @@ -1253,6 +1373,7 @@ struct file_system_type nilfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("nilfs2"); static void nilfs_inode_init_once(void *obj) { @@ -1262,7 +1383,7 @@ static void nilfs_inode_init_once(void *obj) #ifdef CONFIG_NILFS_XATTR init_rwsem(&ii->xattr_sem); #endif - nilfs_btnode_cache_init_once(&ii->i_btnode_cache); + address_space_init_once(&ii->i_btnode_cache); ii->i_bmap = &ii->i_bmap_data; inode_init_once(&ii->vfs_inode); } @@ -1274,6 +1395,12 @@ static void nilfs_segbuf_init_once(void *obj) static void nilfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + if (nilfs_inode_cachep) kmem_cache_destroy(nilfs_inode_cachep); if (nilfs_transaction_cachep) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 0254be2d73c..8ba8229ba07 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> +#include <linux/random.h> #include <linux/crc32.h> #include "nilfs.h" #include "segment.h" @@ -75,7 +76,11 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) nilfs->ns_bdev = bdev; atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); + mutex_init(&nilfs->ns_snapshot_mount_mutex); + INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); + spin_lock_init(&nilfs->ns_inode_lock); + spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_cptree = RB_ROOT; spin_lock_init(&nilfs->ns_cptree_lock); @@ -197,16 +202,16 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, /** * load_nilfs - load and recover the nilfs * @nilfs: the_nilfs structure to be released - * @sbi: nilfs_sb_info used to recover past segment + * @sb: super block isntance used to recover past segment * * load_nilfs() searches and load the latest super root, * attaches the last segment, and does recovery if needed. * The caller must call this exclusively for simultaneous mounts. */ -int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) +int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { struct nilfs_recovery_info ri; - unsigned int s_flags = sbi->s_super->s_flags; + unsigned int s_flags = sb->s_flags; int really_read_only = bdev_read_only(nilfs->ns_bdev); int valid_fs = nilfs_valid_fs(nilfs); int err; @@ -271,7 +276,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto scan_error; } - err = nilfs_load_super_root(nilfs, sbi->s_super, ri.ri_super_root); + err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root); if (unlikely(err)) { printk(KERN_ERR "NILFS: error loading super root.\n"); goto failed; @@ -283,7 +288,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) if (s_flags & MS_RDONLY) { __u64 features; - if (nilfs_test_opt(sbi, NORECOVERY)) { + if (nilfs_test_opt(nilfs, NORECOVERY)) { printk(KERN_INFO "NILFS: norecovery option specified. " "skipping roll-forward recovery\n"); goto skip_recovery; @@ -304,21 +309,21 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) err = -EROFS; goto failed_unload; } - sbi->s_super->s_flags &= ~MS_RDONLY; - } else if (nilfs_test_opt(sbi, NORECOVERY)) { + sb->s_flags &= ~MS_RDONLY; + } else if (nilfs_test_opt(nilfs, NORECOVERY)) { printk(KERN_ERR "NILFS: recovery cancelled because norecovery " "option was specified for a read/write mount\n"); err = -EINVAL; goto failed_unload; } - err = nilfs_salvage_orphan_logs(nilfs, sbi, &ri); + err = nilfs_salvage_orphan_logs(nilfs, sb, &ri); if (err) goto failed_unload; down_write(&nilfs->ns_sem); nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */ - err = nilfs_cleanup_super(sbi); + err = nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); if (err) { @@ -329,9 +334,8 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) printk(KERN_INFO "NILFS: recovery complete.\n"); skip_recovery: - set_nilfs_loaded(nilfs); nilfs_clear_recovery_info(&ri); - sbi->s_super->s_flags = s_flags; + sb->s_flags = s_flags; return 0; scan_error: @@ -345,7 +349,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) failed: nilfs_clear_recovery_info(&ri); - sbi->s_super->s_flags = s_flags; + sb->s_flags = s_flags; return err; } @@ -360,6 +364,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) return res; } +/** + * nilfs_nrsvsegs - calculate the number of reserved segments + * @nilfs: nilfs object + * @nsegs: total number of segments + */ +unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) +{ + return max_t(unsigned long, NILFS_MIN_NRSVSEGS, + DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage, + 100)); +} + +void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) +{ + nilfs->ns_nsegments = nsegs; + nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs); +} + static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { @@ -377,6 +399,16 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, return -EINVAL; nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); + if (nilfs->ns_inode_size > nilfs->ns_blocksize) { + printk(KERN_ERR "NILFS: too large inode size: %d bytes.\n", + nilfs->ns_inode_size); + return -EINVAL; + } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) { + printk(KERN_ERR "NILFS: too small inode size: %d bytes.\n", + nilfs->ns_inode_size); + return -EINVAL; + } + nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); @@ -386,13 +418,15 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, } nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); - nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments); nilfs->ns_r_segments_percentage = le32_to_cpu(sbp->s_r_segments_percentage); - nilfs->ns_nrsvsegs = - max_t(unsigned long, NILFS_MIN_NRSVSEGS, - DIV_ROUND_UP(nilfs->ns_nsegments * - nilfs->ns_r_segments_percentage, 100)); + if (nilfs->ns_r_segments_percentage < 1 || + nilfs->ns_r_segments_percentage > 99) { + printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n"); + return -EINVAL; + } + + nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); return 0; } @@ -476,10 +510,13 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, return -EIO; } printk(KERN_WARNING - "NILFS warning: unable to read primary superblock\n"); - } else if (!sbp[1]) + "NILFS warning: unable to read primary superblock " + "(blocksize = %d)\n", blocksize); + } else if (!sbp[1]) { printk(KERN_WARNING - "NILFS warning: unable to read secondary superblock\n"); + "NILFS warning: unable to read secondary superblock " + "(blocksize = %d)\n", blocksize); + } /* * Compare two super blocks and set 1 in swp if the secondary @@ -495,6 +532,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, brelse(sbh[1]); sbh[1] = NULL; sbp[1] = NULL; + valid[1] = 0; swp = 0; } if (!valid[swp]) { @@ -506,7 +544,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, if (!valid[!swp]) printk(KERN_WARNING "NILFS warning: broken superblock. " - "using spare superblock.\n"); + "using spare superblock (blocksize = %d).\n", blocksize); if (swp) nilfs_swap_super_block(nilfs); @@ -520,7 +558,6 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, /** * init_nilfs - initialize a NILFS instance. * @nilfs: the_nilfs structure - * @sbi: nilfs_sb_info * @sb: super block * @data: mount options * @@ -531,9 +568,8 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, * Return Value: On success, 0 is returned. On error, a negative error * code is returned. */ -int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) { - struct super_block *sb = sbi->s_super; struct nilfs_super_block *sbp; int blocksize; int err; @@ -589,6 +625,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) nilfs->ns_blocksize_bits = sb->s_blocksize_bits; nilfs->ns_blocksize = blocksize; + get_random_bytes(&nilfs->ns_next_generation, + sizeof(nilfs->ns_next_generation)); + err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; @@ -651,12 +690,11 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { - struct inode *dat = nilfs_dat_inode(nilfs); unsigned long ncleansegs; - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; return 0; } @@ -736,8 +774,8 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) new->ifile = NULL; new->nilfs = nilfs; atomic_set(&new->count, 1); - atomic_set(&new->inodes_count, 0); - atomic_set(&new->blocks_count, 0); + atomic64_set(&new->inodes_count, 0); + atomic64_set(&new->blocks_count, 0); rb_link_node(&new->rb_node, parent, p); rb_insert_color(&new->rb_node, &nilfs->ns_cptree); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 69226e14b74..de8cc53b4a5 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -31,13 +31,12 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/slab.h> -#include "sb.h" + +struct nilfs_sc_info; /* the_nilfs struct */ enum { THE_NILFS_INIT = 0, /* Information from super_block is set */ - THE_NILFS_LOADED, /* Roll-back/roll-forward has done and - the latest checkpoint was loaded */ THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ THE_NILFS_GC_RUNNING, /* gc process is running */ THE_NILFS_SB_DIRTY, /* super block is dirty */ @@ -48,11 +47,13 @@ enum { * @ns_flags: flags * @ns_bdev: block device * @ns_sem: semaphore for shared states + * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data * @ns_sbwtime: previous write time of super block * @ns_sbwcount: write count of super block * @ns_sbsize: size of valid data in super block + * @ns_mount_state: file system state * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. * @ns_nextnum: index number of the full segment index to be used next @@ -67,13 +68,23 @@ enum { * @ns_last_cno: checkpoint number of the latest segment * @ns_prot_seq: least sequence number of segments which must not be reclaimed * @ns_prev_seq: base sequence number used to decide if advance log cursor - * @ns_segctor_sem: segment constructor semaphore + * @ns_writer: log writer + * @ns_segctor_sem: semaphore protecting log write * @ns_dat: DAT file inode * @ns_cpfile: checkpoint file inode * @ns_sufile: segusage file inode * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) * @ns_cptree_lock: lock protecting @ns_cptree + * @ns_dirty_files: list of dirty files + * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks + * @ns_next_generation: next generation number for inodes + * @ns_next_gen_lock: lock protecting @ns_next_generation + * @ns_mount_opt: mount options + * @ns_resuid: uid for reserved blocks + * @ns_resgid: gid for reserved blocks + * @ns_interval: checkpoint creation interval + * @ns_watermark: watermark for the number of dirty buffers * @ns_blocksize_bits: bit length of block size * @ns_blocksize: block size * @ns_nsegments: number of segments in filesystem @@ -90,13 +101,12 @@ struct the_nilfs { struct block_device *ns_bdev; struct rw_semaphore ns_sem; + struct mutex ns_snapshot_mount_mutex; /* * used for * - loading the latest checkpoint exclusively. * - allocating a new full segment. - * - protecting s_dirt in the super_block struct - * (see nilfs_write_super) and the following fields. */ struct buffer_head *ns_sbh[2]; struct nilfs_super_block *ns_sbp[2]; @@ -133,6 +143,7 @@ struct the_nilfs { u64 ns_prot_seq; u64 ns_prev_seq; + struct nilfs_sc_info *ns_writer; struct rw_semaphore ns_segctor_sem; /* @@ -147,9 +158,25 @@ struct the_nilfs { struct rb_root ns_cptree; spinlock_t ns_cptree_lock; + /* Dirty inode list */ + struct list_head ns_dirty_files; + spinlock_t ns_inode_lock; + /* GC inode list */ struct list_head ns_gc_inodes; + /* Inode allocator */ + u32 ns_next_generation; + spinlock_t ns_next_gen_lock; + + /* Mount options */ + unsigned long ns_mount_opt; + + uid_t ns_resuid; + gid_t ns_resgid; + unsigned long ns_interval; + unsigned long ns_watermark; + /* Disk layout information (static) */ unsigned int ns_blocksize_bits; unsigned int ns_blocksize; @@ -178,11 +205,24 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \ } THE_NILFS_FNS(INIT, init) -THE_NILFS_FNS(LOADED, loaded) THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) +/* + * Mount option operations + */ +#define nilfs_clear_opt(nilfs, opt) \ + do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0) +#define nilfs_set_opt(nilfs, opt) \ + do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0) +#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt) +#define nilfs_write_opt(nilfs, mask, opt) \ + do { (nilfs)->ns_mount_opt = \ + (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \ + NILFS_MOUNT_##opt); \ + } while (0) + /** * struct nilfs_root - nilfs root object * @cno: checkpoint number @@ -190,9 +230,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty) * @count: refcount of this structure * @nilfs: nilfs object * @ifile: inode file - * @root: root inode * @inodes_count: number of inodes - * @blocks_count: number of blocks (Reserved) + * @blocks_count: number of blocks */ struct nilfs_root { __u64 cno; @@ -202,8 +241,8 @@ struct nilfs_root { struct the_nilfs *nilfs; struct inode *ifile; - atomic_t inodes_count; - atomic_t blocks_count; + atomic64_t inodes_count; + atomic64_t blocks_count; }; /* Special checkpoint number */ @@ -227,15 +266,16 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct block_device *bdev); void destroy_nilfs(struct the_nilfs *nilfs); -int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); -int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); +int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); +unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs); +void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs); int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno); void nilfs_put_root(struct nilfs_root *root); -struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64); int nilfs_near_disk_full(struct the_nilfs *); void nilfs_fall_back_super_block(struct the_nilfs *); void nilfs_swap_super_block(struct the_nilfs *); |
