diff options
Diffstat (limited to 'fs/nilfs2')
44 files changed, 7635 insertions, 5572 deletions
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig new file mode 100644 index 00000000000..80da8eb2739 --- /dev/null +++ b/fs/nilfs2/Kconfig @@ -0,0 +1,24 @@ +config NILFS2_FS + tristate "NILFS2 file system support" + select CRC32 + help + NILFS2 is a log-structured file system (LFS) supporting continuous + snapshotting. In addition to versioning capability of the entire + file system, users can even restore files mistakenly overwritten or + destroyed just a few seconds ago. Since this file system can keep + consistency like conventional LFS, it achieves quick recovery after + system crashes. + + NILFS2 creates a number of checkpoints every few seconds or per + synchronous write basis (unless there is no change). Users can + select significant versions among continuously created checkpoints, + and can change them into snapshots which will be preserved for long + periods until they are changed back to checkpoints. Each + snapshot is mountable as a read-only file system concurrently with + its writable mount, and this feature is convenient for online backup. + + Some features including atime, extended attributes, and POSIX ACLs, + are not supported yet. + + To compile this file system support as a module, choose M here: the + module will be called nilfs2. If unsure, say N. diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile index df3e62c1ddc..85c98737a14 100644 --- a/fs/nilfs2/Makefile +++ b/fs/nilfs2/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ btnode.o bmap.o btree.o direct.o dat.o recovery.o \ the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ - ifile.o alloc.o gcinode.o ioctl.o gcdat.o + ifile.o alloc.o gcinode.o ioctl.o diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index d69e6ae5925..741fd02e044 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -26,10 +26,16 @@ #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/bitops.h> +#include <linux/slab.h> #include "mdt.h" #include "alloc.h" +/** + * nilfs_palloc_groups_per_desc_block - get the number of groups that a group + * descriptor block can maintain + * @inode: inode of metadata file using this allocator + */ static inline unsigned long nilfs_palloc_groups_per_desc_block(const struct inode *inode) { @@ -37,12 +43,21 @@ nilfs_palloc_groups_per_desc_block(const struct inode *inode) sizeof(struct nilfs_palloc_group_desc); } +/** + * nilfs_palloc_groups_count - get maximum number of groups + * @inode: inode of metadata file using this allocator + */ static inline unsigned long nilfs_palloc_groups_count(const struct inode *inode) { return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */)); } +/** + * nilfs_palloc_init_blockgroup - initialize private variables for allocator + * @inode: inode of metadata file using this allocator + * @entry_size: size of the persistent object + */ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); @@ -68,6 +83,12 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) return 0; } +/** + * nilfs_palloc_group - get group number and offset from an entry number + * @inode: inode of metadata file using this allocator + * @nr: serial number of the entry (e.g. inode number) + * @offset: pointer to store offset number in the group + */ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, unsigned long *offset) { @@ -77,6 +98,14 @@ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, return group; } +/** + * nilfs_palloc_desc_blkoff - get block offset of a group descriptor block + * @inode: inode of metadata file using this allocator + * @group: group number + * + * nilfs_palloc_desc_blkoff() returns block offset of the descriptor + * block which contains a descriptor of the specified group. + */ static unsigned long nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) { @@ -85,6 +114,14 @@ nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block; } +/** + * nilfs_palloc_bitmap_blkoff - get block offset of a bitmap block + * @inode: inode of metadata file using this allocator + * @group: group number + * + * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap + * block used to allocate/deallocate entries in the specified group. + */ static unsigned long nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) { @@ -94,6 +131,12 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) desc_offset * NILFS_MDT(inode)->mi_blocks_per_group; } +/** + * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group + * @inode: inode of metadata file using this allocator + * @group: group number + * @desc: pointer to descriptor structure for the group + */ static unsigned long nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, const struct nilfs_palloc_group_desc *desc) @@ -106,6 +149,13 @@ nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, return nfree; } +/** + * nilfs_palloc_group_desc_add_entries - adjust count of free entries + * @inode: inode of metadata file using this allocator + * @group: group number + * @desc: pointer to descriptor structure for the group + * @n: delta to be added + */ static void nilfs_palloc_group_desc_add_entries(struct inode *inode, unsigned long group, @@ -117,6 +167,11 @@ nilfs_palloc_group_desc_add_entries(struct inode *inode, spin_unlock(nilfs_mdt_bgl_lock(inode, group)); } +/** + * nilfs_palloc_entry_blkoff - get block offset of an entry block + * @inode: inode of metadata file using this allocator + * @nr: serial number of the entry (e.g. inode number) + */ static unsigned long nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) { @@ -128,6 +183,12 @@ nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) group_offset / NILFS_MDT(inode)->mi_entries_per_block; } +/** + * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block + * @inode: inode of metadata file + * @bh: buffer head of the buffer to be initialized + * @kaddr: kernel address mapped for the page including the buffer + */ static void nilfs_palloc_desc_block_init(struct inode *inode, struct buffer_head *bh, void *kaddr) { @@ -142,31 +203,105 @@ static void nilfs_palloc_desc_block_init(struct inode *inode, } } +static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, + int create, + void (*init_block)(struct inode *, + struct buffer_head *, + void *), + struct buffer_head **bhp, + struct nilfs_bh_assoc *prev, + spinlock_t *lock) +{ + int ret; + + spin_lock(lock); + if (prev->bh && blkoff == prev->blkoff) { + get_bh(prev->bh); + *bhp = prev->bh; + spin_unlock(lock); + return 0; + } + spin_unlock(lock); + + ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp); + if (!ret) { + spin_lock(lock); + /* + * The following code must be safe for change of the + * cache contents during the get block call. + */ + brelse(prev->bh); + get_bh(*bhp); + prev->bh = *bhp; + prev->blkoff = blkoff; + spin_unlock(lock); + } + return ret; +} + +/** + * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block + * @inode: inode of metadata file using this allocator + * @group: group number + * @create: create flag + * @bhp: pointer to store the resultant buffer head + */ static int nilfs_palloc_get_desc_block(struct inode *inode, unsigned long group, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, - nilfs_palloc_desc_blkoff(inode, group), - create, nilfs_palloc_desc_block_init, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_desc_blkoff(inode, group), + create, nilfs_palloc_desc_block_init, + bhp, &cache->prev_desc, &cache->lock); } +/** + * nilfs_palloc_get_bitmap_block - get buffer head of a bitmap block + * @inode: inode of metadata file using this allocator + * @group: group number + * @create: create flag + * @bhp: pointer to store the resultant buffer head + */ static int nilfs_palloc_get_bitmap_block(struct inode *inode, unsigned long group, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, - nilfs_palloc_bitmap_blkoff(inode, group), - create, NULL, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_bitmap_blkoff(inode, group), + create, NULL, bhp, + &cache->prev_bitmap, &cache->lock); } +/** + * nilfs_palloc_get_entry_block - get buffer head of an entry block + * @inode: inode of metadata file using this allocator + * @nr: serial number of the entry (e.g. inode number) + * @create: create flag + * @bhp: pointer to store the resultant buffer head + */ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr), - create, NULL, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_entry_blkoff(inode, nr), + create, NULL, bhp, + &cache->prev_entry, &cache->lock); } +/** + * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor + * @inode: inode of metadata file using this allocator + * @group: group number + * @bh: buffer head of the buffer storing the group descriptor block + * @kaddr: kernel address mapped for the page including the buffer + */ static struct nilfs_palloc_group_desc * nilfs_palloc_block_get_group_desc(const struct inode *inode, unsigned long group, @@ -176,13 +311,13 @@ nilfs_palloc_block_get_group_desc(const struct inode *inode, group % nilfs_palloc_groups_per_desc_block(inode); } -static unsigned char * -nilfs_palloc_block_get_bitmap(const struct inode *inode, - const struct buffer_head *bh, void *kaddr) -{ - return (unsigned char *)(kaddr + bh_offset(bh)); -} - +/** + * nilfs_palloc_block_get_entry - get kernel address of an entry + * @inode: inode of metadata file using this allocator + * @nr: serial number of the entry (e.g. inode number) + * @bh: buffer head of the buffer storing the entry block + * @kaddr: kernel address mapped for the page including the buffer + */ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, const struct buffer_head *bh, void *kaddr) { @@ -195,11 +330,19 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, entry_offset * NILFS_MDT(inode)->mi_entry_size; } +/** + * nilfs_palloc_find_available_slot - find available slot in a group + * @inode: inode of metadata file using this allocator + * @group: group number + * @target: offset number of an entry in the group (start point) + * @bitmap: bitmap of the group + * @bsize: size in bits + */ static int nilfs_palloc_find_available_slot(struct inode *inode, unsigned long group, unsigned long target, unsigned char *bitmap, - int bsize) /* size in bits */ + int bsize) { int curr, pos, end, i; @@ -237,6 +380,13 @@ static int nilfs_palloc_find_available_slot(struct inode *inode, return -ENOSPC; } +/** + * nilfs_palloc_rest_groups_in_desc_block - get the remaining number of groups + * in a group descriptor block + * @inode: inode of metadata file using this allocator + * @curr: current group number + * @max: maximum number of groups + */ static unsigned long nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, unsigned long curr, unsigned long max) @@ -247,6 +397,74 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, max - curr + 1); } +/** + * nilfs_palloc_count_desc_blocks - count descriptor blocks number + * @inode: inode of metadata file using this allocator + * @desc_blocks: descriptor blocks number [out] + */ +static int nilfs_palloc_count_desc_blocks(struct inode *inode, + unsigned long *desc_blocks) +{ + unsigned long blknum; + int ret; + + ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); + if (likely(!ret)) + *desc_blocks = DIV_ROUND_UP( + blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); + return ret; +} + +/** + * nilfs_palloc_mdt_file_can_grow - check potential opportunity for + * MDT file growing + * @inode: inode of metadata file using this allocator + * @desc_blocks: known current descriptor blocks count + */ +static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, + unsigned long desc_blocks) +{ + return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) < + nilfs_palloc_groups_count(inode); +} + +/** + * nilfs_palloc_count_max_entries - count max number of entries that can be + * described by descriptor blocks count + * @inode: inode of metadata file using this allocator + * @nused: current number of used entries + * @nmaxp: max number of entries [out] + */ +int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) +{ + unsigned long desc_blocks = 0; + u64 entries_per_desc_block, nmax; + int err; + + err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks); + if (unlikely(err)) + return err; + + entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) * + nilfs_palloc_groups_per_desc_block(inode); + nmax = entries_per_desc_block * desc_blocks; + + if (nused == nmax && + nilfs_palloc_mdt_file_can_grow(inode, desc_blocks)) + nmax += entries_per_desc_block; + + if (nused > nmax) + return -ERANGE; + + *nmaxp = nmax; + return 0; +} + +/** + * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object + * @inode: inode of metadata file using this allocator + * @req: nilfs_palloc_req structure exchanged for the allocation + */ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, struct nilfs_palloc_req *req) { @@ -289,8 +507,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, if (ret < 0) goto out_desc; bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap( - inode, bitmap_bh, bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(bitmap_bh); pos = nilfs_palloc_find_available_slot( inode, group, group_offset, bitmap, entries_per_group); @@ -327,17 +544,27 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, return ret; } +/** + * nilfs_palloc_commit_alloc_entry - finish allocation of a persistent object + * @inode: inode of metadata file using this allocator + * @req: nilfs_palloc_req structure exchanged for the allocation + */ void nilfs_palloc_commit_alloc_entry(struct inode *inode, struct nilfs_palloc_req *req) { - nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); - nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); + mark_buffer_dirty(req->pr_bitmap_bh); + mark_buffer_dirty(req->pr_desc_bh); nilfs_mdt_mark_dirty(inode); brelse(req->pr_bitmap_bh); brelse(req->pr_desc_bh); } +/** + * nilfs_palloc_commit_free_entry - finish deallocating a persistent object + * @inode: inode of metadata file using this allocator + * @req: nilfs_palloc_req structure exchanged for the removal + */ void nilfs_palloc_commit_free_entry(struct inode *inode, struct nilfs_palloc_req *req) { @@ -351,27 +578,31 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, desc = nilfs_palloc_block_get_group_desc(inode, group, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, - bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) printk(KERN_WARNING "%s: entry number %llu already freed\n", __func__, (unsigned long long)req->pr_entry_nr); - - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + else + nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); - nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); - nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); + mark_buffer_dirty(req->pr_desc_bh); + mark_buffer_dirty(req->pr_bitmap_bh); nilfs_mdt_mark_dirty(inode); brelse(req->pr_bitmap_bh); brelse(req->pr_desc_bh); } +/** + * nilfs_palloc_abort_alloc_entry - cancel allocation of a persistent object + * @inode: inode of metadata file using this allocator + * @req: nilfs_palloc_req structure exchanged for the allocation + */ void nilfs_palloc_abort_alloc_entry(struct inode *inode, struct nilfs_palloc_req *req) { @@ -385,14 +616,13 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, desc = nilfs_palloc_block_get_group_desc(inode, group, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, - bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) - printk(KERN_WARNING "%s: entry numer %llu already freed\n", + printk(KERN_WARNING "%s: entry number %llu already freed\n", __func__, (unsigned long long)req->pr_entry_nr); - - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + else + nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); @@ -405,6 +635,11 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, req->pr_desc_bh = NULL; } +/** + * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object + * @inode: inode of metadata file using this allocator + * @req: nilfs_palloc_req structure exchanged for the removal + */ int nilfs_palloc_prepare_free_entry(struct inode *inode, struct nilfs_palloc_req *req) { @@ -427,6 +662,11 @@ int nilfs_palloc_prepare_free_entry(struct inode *inode, return 0; } +/** + * nilfs_palloc_abort_free_entry - cancel deallocating a persistent object + * @inode: inode of metadata file using this allocator + * @req: nilfs_palloc_req structure exchanged for the removal + */ void nilfs_palloc_abort_free_entry(struct inode *inode, struct nilfs_palloc_req *req) { @@ -438,6 +678,12 @@ void nilfs_palloc_abort_free_entry(struct inode *inode, req->pr_desc_bh = NULL; } +/** + * nilfs_palloc_group_is_in - judge if an entry is in a group + * @inode: inode of metadata file using this allocator + * @group: group number + * @nr: serial number of the entry (e.g. inode number) + */ static int nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) { @@ -448,6 +694,12 @@ nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) return (nr >= first) && (nr <= last); } +/** + * nilfs_palloc_freev - deallocate a set of persistent objects + * @inode: inode of metadata file using this allocator + * @entry_nrs: array of entry numbers to be deallocated + * @nitems: number of entries stored in @entry_nrs + */ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) { struct buffer_head *desc_bh, *bitmap_bh; @@ -457,7 +709,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) unsigned long group, group_offset; int i, j, n, ret; - for (i = 0; i < nitems; i += n) { + for (i = 0; i < nitems; i = j) { group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); if (ret < 0) @@ -472,12 +724,11 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) desc = nilfs_palloc_block_get_group_desc( inode, group, desc_bh, desc_kaddr); bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap( - inode, bitmap_bh, bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(bitmap_bh); for (j = i, n = 0; (j < nitems) && nilfs_palloc_group_is_in(inode, group, entry_nrs[j]); - j++, n++) { + j++) { nilfs_palloc_group(inode, entry_nrs[j], &group_offset); if (!nilfs_clear_bit_atomic( nilfs_mdt_bgl_lock(inode, group), @@ -486,6 +737,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) "%s: entry number %llu already freed\n", __func__, (unsigned long long)entry_nrs[j]); + } else { + n++; } } nilfs_palloc_group_desc_add_entries(inode, group, desc, n); @@ -493,8 +746,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) kunmap(bitmap_bh->b_page); kunmap(desc_bh->b_page); - nilfs_mdt_mark_buffer_dirty(desc_bh); - nilfs_mdt_mark_buffer_dirty(bitmap_bh); + mark_buffer_dirty(desc_bh); + mark_buffer_dirty(bitmap_bh); nilfs_mdt_mark_dirty(inode); brelse(bitmap_bh); @@ -502,3 +755,30 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) } return 0; } + +void nilfs_palloc_setup_cache(struct inode *inode, + struct nilfs_palloc_cache *cache) +{ + NILFS_MDT(inode)->mi_palloc_cache = cache; + spin_lock_init(&cache->lock); +} + +void nilfs_palloc_clear_cache(struct inode *inode) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + spin_lock(&cache->lock); + brelse(cache->prev_desc.bh); + brelse(cache->prev_bitmap.bh); + brelse(cache->prev_entry.bh); + cache->prev_desc.bh = NULL; + cache->prev_bitmap.bh = NULL; + cache->prev_entry.bh = NULL; + spin_unlock(&cache->lock); +} + +void nilfs_palloc_destroy_cache(struct inode *inode) +{ + nilfs_palloc_clear_cache(inode); + NILFS_MDT(inode)->mi_palloc_cache = NULL; +} diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 4ace5475c2c..4bd6451b570 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -29,6 +29,13 @@ #include <linux/buffer_head.h> #include <linux/fs.h> +/** + * nilfs_palloc_entries_per_group - get the number of entries per group + * @inode: inode of metadata file using this allocator + * + * The number of entries per group is defined by the number of bits + * that a bitmap block can maintain. + */ static inline unsigned long nilfs_palloc_entries_per_group(const struct inode *inode) { @@ -41,8 +48,10 @@ int nilfs_palloc_get_entry_block(struct inode *, __u64, int, void *nilfs_palloc_block_get_entry(const struct inode *, __u64, const struct buffer_head *, void *); +int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); + /** - * nilfs_palloc_req - persistent alloctor request and reply + * nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) * @pr_desc_bh: buffer head of the buffer containing block group descriptors * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap @@ -67,6 +76,35 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_set_bit_atomic ext2_set_bit_atomic #define nilfs_clear_bit_atomic ext2_clear_bit_atomic -#define nilfs_find_next_zero_bit ext2_find_next_zero_bit +#define nilfs_find_next_zero_bit find_next_zero_bit_le + +/** + * struct nilfs_bh_assoc - block offset and buffer head association + * @blkoff: block offset + * @bh: buffer head + */ +struct nilfs_bh_assoc { + unsigned long blkoff; + struct buffer_head *bh; +}; + +/** + * struct nilfs_palloc_cache - persistent object allocator cache + * @lock: cache protecting lock + * @prev_desc: blockgroup descriptors cache + * @prev_bitmap: blockgroup bitmap cache + * @prev_entry: translation entries cache + */ +struct nilfs_palloc_cache { + spinlock_t lock; + struct nilfs_bh_assoc prev_desc; + struct nilfs_bh_assoc prev_bitmap; + struct nilfs_bh_assoc prev_entry; +}; + +void nilfs_palloc_setup_cache(struct inode *inode, + struct nilfs_palloc_cache *cache); +void nilfs_palloc_clear_cache(struct inode *inode); +void nilfs_palloc_destroy_cache(struct inode *inode); #endif /* _NILFS_ALLOC_H */ diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 064279e33bb..aadbd0b5e3e 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -25,46 +25,45 @@ #include <linux/errno.h> #include "nilfs.h" #include "bmap.h" -#include "sb.h" +#include "btree.h" +#include "direct.h" #include "btnode.h" #include "mdt.h" #include "dat.h" #include "alloc.h" -int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, - __u64 *ptrp) +struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) { - __u64 ptr; - int ret; - - down_read(&bmap->b_sem); - ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); - if (ret < 0) - goto out; - if (bmap->b_pops->bpop_translate != NULL) { - ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr); - if (ret < 0) - goto out; - *ptrp = ptr; - } + struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info; - out: - up_read(&bmap->b_sem); - return ret; + return nilfs->ns_dat; } +static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, + const char *fname, int err) +{ + struct inode *inode = bmap->b_inode; + + if (err == -EINVAL) { + nilfs_error(inode->i_sb, fname, + "broken bmap (inode number=%lu)\n", inode->i_ino); + err = -EIO; + } + return err; +} /** - * nilfs_bmap_lookup - find a record + * nilfs_bmap_lookup_at_level - find a data block or node block * @bmap: bmap * @key: key - * @recp: pointer to record + * @level: level + * @ptrp: place to store the value associated to @key * - * Description: nilfs_bmap_lookup() finds a record whose key matches @key in - * @bmap. + * Description: nilfs_bmap_lookup_at_level() finds a record whose key + * matches @key in the block at @level of the bmap. * * Return Value: On success, 0 is returned and the record associated with @key - * is stored in the place pointed by @recp. On error, one of the following + * is stored in the place pointed by @ptrp. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. @@ -73,20 +72,42 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, * * %-ENOENT - A record associated with @key does not exist. */ -int nilfs_bmap_lookup(struct nilfs_bmap *bmap, - unsigned long key, - unsigned long *recp) +int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, + __u64 *ptrp) { - __u64 ptr; + sector_t blocknr; int ret; - /* XXX: use macro for level 1 */ - ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr); - if (recp != NULL) - *recp = ptr; + down_read(&bmap->b_sem); + ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); + if (ret < 0) { + ret = nilfs_bmap_convert_error(bmap, __func__, ret); + goto out; + } + if (NILFS_BMAP_USE_VBN(bmap)) { + ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, + &blocknr); + if (!ret) + *ptrp = blocknr; + } + + out: + up_read(&bmap->b_sem); return ret; } +int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, + unsigned maxblocks) +{ + int ret; + + down_read(&bmap->b_sem); + ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); + up_read(&bmap->b_sem); + + return nilfs_bmap_convert_error(bmap, __func__, ret); +} + static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) { __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; @@ -101,8 +122,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) if (n < 0) return n; ret = nilfs_btree_convert_and_insert( - bmap, key, ptr, keys, ptrs, n, - NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH); + bmap, key, ptr, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags |= NILFS_BMAP_LARGE; @@ -141,7 +161,8 @@ int nilfs_bmap_insert(struct nilfs_bmap *bmap, down_write(&bmap->b_sem); ret = nilfs_bmap_do_insert(bmap, key, rec); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) @@ -158,8 +179,7 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) if (n < 0) return n; ret = nilfs_direct_delete_and_convert( - bmap, key, keys, ptrs, n, - NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH); + bmap, key, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; @@ -178,9 +198,12 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) down_read(&bmap->b_sem); ret = bmap->b_ops->bop_last_key(bmap, &lastkey); - if (!ret) - *key = lastkey; up_read(&bmap->b_sem); + + if (ret < 0) + ret = nilfs_bmap_convert_error(bmap, __func__, ret); + else + *key = lastkey; return ret; } @@ -208,7 +231,8 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) down_write(&bmap->b_sem); ret = nilfs_bmap_do_delete(bmap, key); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) @@ -259,7 +283,8 @@ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) down_write(&bmap->b_sem); ret = nilfs_bmap_do_truncate(bmap, key); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -298,7 +323,8 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) down_write(&bmap->b_sem); ret = bmap->b_ops->bop_propagate(bmap, bh); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -342,7 +368,8 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap, down_write(&bmap->b_sem); ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -371,7 +398,8 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) down_write(&bmap->b_sem); ret = bmap->b_ops->bop_mark(bmap, key, level); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -398,57 +426,6 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) /* * Internal use only */ - -void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) -{ - inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); - if (NILFS_MDT(bmap->b_inode)) - nilfs_mdt_mark_dirty(bmap->b_inode); - else - mark_inode_dirty(bmap->b_inode); -} - -void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) -{ - inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); - if (NILFS_MDT(bmap->b_inode)) - nilfs_mdt_mark_dirty(bmap->b_inode); - else - mark_inode_dirty(bmap->b_inode); -} - -int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr, - struct buffer_head **bhp) -{ - return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, - ptr, 0, bhp, 0); -} - -void nilfs_bmap_put_block(const struct nilfs_bmap *bmap, - struct buffer_head *bh) -{ - brelse(bh); -} - -int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr, - struct buffer_head **bhp) -{ - int ret; - - ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, - ptr, 0, bhp, 1); - if (ret < 0) - return ret; - set_buffer_nilfs_volatile(*bhp); - return 0; -} - -void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap, - struct buffer_head *bh) -{ - nilfs_btnode_delete(bh); -} - __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { @@ -457,8 +434,8 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - bmap->b_inode->i_blkbits); - for (pbh = page_buffers(bh->b_page); pbh != bh; - pbh = pbh->b_this_page, key++); + for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) + key++; return key; } @@ -476,11 +453,6 @@ __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) return NILFS_BMAP_INVALID_PTR; } -static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) -{ - return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); -} - #define NILFS_BMAP_GROUP_DIV 8 __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) { @@ -493,202 +465,8 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) (entries_per_group / NILFS_BMAP_GROUP_DIV); } -static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req, - sector_t blocknr) -{ - nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req, - blocknr); -} - -static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0); -} - -static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1); -} - -static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr, - sector_t blocknr) -{ - return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr); -} - -int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr) -{ - return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr); -} - -int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) -{ - int ret; - - ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq); - if (ret < 0) - return ret; - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq); - if (ret < 0) - bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); - - return ret; -} - -void nilfs_bmap_commit_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) -{ - bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq); - bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq); -} - -void nilfs_bmap_abort_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) -{ - bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); - bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq); -} - -static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr, - __u64 *ptrp) -{ - sector_t blocknr; - int ret; - - ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr); - if (ret < 0) - return ret; - if (ptrp != NULL) - *ptrp = blocknr; - return 0; -} - -static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - /* ignore target ptr */ - req->bpr_ptr = bmap->b_last_allocated_ptr++; - return 0; -} - -static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - /* do nothing */ -} - -static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - bmap->b_last_allocated_ptr--; -} - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, - .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, - .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, - .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, - .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, - .bpop_commit_end_ptr = nilfs_bmap_commit_end_v, - .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, - - .bpop_translate = nilfs_bmap_translate_v, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, - .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, - .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, - .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, - .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, - .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt, - .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, - - .bpop_translate = nilfs_bmap_translate_v, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p, - .bpop_prepare_start_ptr = NULL, - .bpop_commit_start_ptr = NULL, - .bpop_abort_start_ptr = NULL, - .bpop_prepare_end_ptr = NULL, - .bpop_commit_end_ptr = NULL, - .bpop_abort_end_ptr = NULL, - - .bpop_translate = NULL, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { - .bpop_prepare_alloc_ptr = NULL, - .bpop_commit_alloc_ptr = NULL, - .bpop_abort_alloc_ptr = NULL, - .bpop_prepare_start_ptr = NULL, - .bpop_commit_start_ptr = NULL, - .bpop_abort_start_ptr = NULL, - .bpop_prepare_end_ptr = NULL, - .bpop_commit_end_ptr = NULL, - .bpop_abort_end_ptr = NULL, - - .bpop_translate = NULL, -}; - static struct lock_class_key nilfs_bmap_dat_lock_key; +static struct lock_class_key nilfs_bmap_mdt_lock_key; /** * nilfs_bmap_read - read a bmap from an inode @@ -714,31 +492,30 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; switch (bmap->b_inode->i_ino) { case NILFS_DAT_INO: - bmap->b_pops = &nilfs_bmap_ptr_ops_p; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_P; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); break; case NILFS_CPFILE_INO: case NILFS_SUFILE_INO: - bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_VS; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); break; + case NILFS_IFILE_INO: + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); + /* Fall through */ default: - bmap->b_pops = &nilfs_bmap_ptr_ops_v; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_VM; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; break; } return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? - nilfs_btree_init(bmap, - NILFS_BMAP_LARGE_LOW, - NILFS_BMAP_LARGE_HIGH) : - nilfs_direct_init(bmap, - NILFS_BMAP_SMALL_LOW, - NILFS_BMAP_SMALL_HIGH); + nilfs_btree_init(bmap) : nilfs_direct_init(bmap); } /** @@ -764,25 +541,27 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); init_rwsem(&bmap->b_sem); bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; - bmap->b_pops = &nilfs_bmap_ptr_ops_gc; + bmap->b_ptr_type = NILFS_BMAP_PTR_U; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; bmap->b_state = 0; nilfs_btree_init_gc(bmap); } -void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) +void nilfs_bmap_save(const struct nilfs_bmap *bmap, + struct nilfs_bmap_store *store) { - memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); - init_rwsem(&gcbmap->b_sem); - lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); - gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; + memcpy(store->data, bmap->b_u.u_data, sizeof(store->data)); + store->last_allocated_key = bmap->b_last_allocated_key; + store->last_allocated_ptr = bmap->b_last_allocated_ptr; + store->state = bmap->b_state; } -void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) +void nilfs_bmap_restore(struct nilfs_bmap *bmap, + const struct nilfs_bmap_store *store) { - memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); - init_rwsem(&bmap->b_sem); - lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); - bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; + memcpy(bmap->b_u.u_data, store->data, sizeof(store->data)); + bmap->b_last_allocated_key = store->last_allocated_key; + bmap->b_last_allocated_ptr = store->last_allocated_ptr; + bmap->b_state = store->state; } diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 4f2708abb1b..b89e68076ad 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -28,14 +28,10 @@ #include <linux/buffer_head.h> #include <linux/nilfs2_fs.h> #include "alloc.h" +#include "dat.h" #define NILFS_BMAP_INVALID_PTR 0 -#define nilfs_bmap_dkey_to_key(dkey) le64_to_cpu(dkey) -#define nilfs_bmap_key_to_dkey(key) cpu_to_le64(key) -#define nilfs_bmap_dptr_to_ptr(dptr) le64_to_cpu(dptr) -#define nilfs_bmap_ptr_to_dptr(ptr) cpu_to_le64(ptr) - #define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff)) @@ -64,11 +60,13 @@ struct nilfs_bmap_stats { */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); + int (*bop_lookup_contig)(const struct nilfs_bmap *, __u64, __u64 *, + unsigned); int (*bop_insert)(struct nilfs_bmap *, __u64, __u64); int (*bop_delete)(struct nilfs_bmap *, __u64); void (*bop_clear)(struct nilfs_bmap *); - int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *); + int (*bop_propagate)(struct nilfs_bmap *, struct buffer_head *); void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *, struct list_head *); @@ -86,34 +84,6 @@ struct nilfs_bmap_operations { }; -/** - * struct nilfs_bmap_ptr_operations - bmap ptr operation table - */ -struct nilfs_bmap_ptr_operations { - int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - int (*bpop_prepare_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - sector_t); - void (*bpop_abort_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - int (*bpop_prepare_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_abort_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - - int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *); -}; - - #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) #define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) #define NILFS_BMAP_NEW_PTR_INIT \ @@ -131,12 +101,11 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) * @b_sem: semaphore * @b_inode: owner of bmap * @b_ops: bmap operation table - * @b_pops: bmap ptr operation table - * @b_low: low watermark of conversion - * @b_high: high watermark of conversion * @b_last_allocated_key: last allocated key for data block * @b_last_allocated_ptr: last allocated ptr for data block + * @b_ptr_type: pointer type * @b_state: state + * @b_nchildren_per_block: maximum number of child nodes for non-root nodes */ struct nilfs_bmap { union { @@ -146,22 +115,44 @@ struct nilfs_bmap { struct rw_semaphore b_sem; struct inode *b_inode; const struct nilfs_bmap_operations *b_ops; - const struct nilfs_bmap_ptr_operations *b_pops; - __u64 b_low; - __u64 b_high; __u64 b_last_allocated_key; __u64 b_last_allocated_ptr; + int b_ptr_type; int b_state; + __u16 b_nchildren_per_block; }; +/* pointer type */ +#define NILFS_BMAP_PTR_P 0 /* physical block number (i.e. LBN) */ +#define NILFS_BMAP_PTR_VS 1 /* virtual block number (single + version) */ +#define NILFS_BMAP_PTR_VM 2 /* virtual block number (has multiple + versions) */ +#define NILFS_BMAP_PTR_U (-1) /* never perform pointer operations */ + +#define NILFS_BMAP_USE_VBN(bmap) ((bmap)->b_ptr_type > 0) + /* state */ #define NILFS_BMAP_DIRTY 0x00000001 +/** + * struct nilfs_bmap_store - shadow copy of bmap state + * @data: cached raw block mapping of on-disk inode + * @last_allocated_key: cached value of last allocated key for data block + * @last_allocated_ptr: cached value of last allocated ptr for data block + * @state: cached value of state field of bmap structure + */ +struct nilfs_bmap_store { + __le64 data[NILFS_BMAP_SIZE / sizeof(__le64)]; + __u64 last_allocated_key; + __u64 last_allocated_ptr; + int state; +}; int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); -int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *); +int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); @@ -175,17 +166,80 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *); int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int); void nilfs_bmap_init_gc(struct nilfs_bmap *); -void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); -void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); +void nilfs_bmap_save(const struct nilfs_bmap *, struct nilfs_bmap_store *); +void nilfs_bmap_restore(struct nilfs_bmap *, const struct nilfs_bmap_store *); + +static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key, + __u64 *ptr) +{ + return nilfs_bmap_lookup_at_level(bmap, key, 1, ptr); +} /* * Internal use only */ +struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *); + +static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req, + struct inode *dat) +{ + if (dat) + return nilfs_dat_prepare_alloc(dat, &req->bpr_req); + /* ignore target ptr */ + req->bpr_ptr = bmap->b_last_allocated_ptr++; + return 0; +} -int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t); -int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64); +static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req, + struct inode *dat) +{ + if (dat) + nilfs_dat_commit_alloc(dat, &req->bpr_req); +} +static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req, + struct inode *dat) +{ + if (dat) + nilfs_dat_abort_alloc(dat, &req->bpr_req); + else + bmap->b_last_allocated_ptr--; +} + +static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req, + struct inode *dat) +{ + return dat ? nilfs_dat_prepare_end(dat, &req->bpr_req) : 0; +} + +static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req, + struct inode *dat) +{ + if (dat) + nilfs_dat_commit_end(dat, &req->bpr_req, + bmap->b_ptr_type == NILFS_BMAP_PTR_VS); +} + +static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req, + struct inode *dat) +{ + if (dat) + nilfs_dat_abort_end(dat, &req->bpr_req); +} + +static inline void nilfs_bmap_set_target_v(struct nilfs_bmap *bmap, __u64 key, + __u64 ptr) +{ + bmap->b_last_allocated_key = key; + bmap->b_last_allocated_ptr = ptr; +} __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, const struct buffer_head *); @@ -193,27 +247,6 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); -int nilfs_bmap_prepare_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); -void nilfs_bmap_commit_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); -void nilfs_bmap_abort_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); - -void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); -void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); - - -int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64, - struct buffer_head **); -void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *); -int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64, - struct buffer_head **); -void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *); - /* Assume that bmap semaphore is locked. */ static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h deleted file mode 100644 index d41509bff47..00000000000 --- a/fs/nilfs2/bmap_union.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * bmap_union.h - NILFS block mapping. - * - * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. - */ - -#ifndef _NILFS_BMAP_UNION_H -#define _NILFS_BMAP_UNION_H - -#include "bmap.h" -#include "direct.h" -#include "btree.h" - -/** - * nilfs_bmap_union - - * @bi_bmap: bmap structure - * @bi_btree: direct map structure - * @bi_direct: B-tree structure - */ -union nilfs_bmap_union { - struct nilfs_bmap bi_bmap; - struct nilfs_direct bi_direct; - struct nilfs_btree bi_btree; -}; - -#endif /* _NILFS_BMAP_UNION_H */ diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 4cc07b2c30e..a35ae35e693 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -27,49 +27,52 @@ #include <linux/buffer_head.h> #include <linux/mm.h> #include <linux/backing-dev.h> +#include <linux/gfp.h> #include "nilfs.h" #include "mdt.h" #include "dat.h" #include "page.h" #include "btnode.h" - -void nilfs_btnode_cache_init_once(struct address_space *btnc) +void nilfs_btnode_cache_clear(struct address_space *btnc) { - INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); - spin_lock_init(&btnc->tree_lock); - INIT_LIST_HEAD(&btnc->private_list); - spin_lock_init(&btnc->private_lock); - - spin_lock_init(&btnc->i_mmap_lock); - INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); - INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); + invalidate_mapping_pages(btnc, 0, -1); + truncate_inode_pages(btnc, 0); } -static struct address_space_operations def_btnode_aops; - -void nilfs_btnode_cache_init(struct address_space *btnc) +struct buffer_head * +nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) { - btnc->host = NULL; /* can safely set to host inode ? */ - btnc->flags = 0; - mapping_set_gfp_mask(btnc, GFP_NOFS); - btnc->assoc_mapping = NULL; - btnc->backing_dev_info = &default_backing_dev_info; - btnc->a_ops = &def_btnode_aops; -} + struct inode *inode = NILFS_BTNC_I(btnc); + struct buffer_head *bh; -void nilfs_btnode_cache_clear(struct address_space *btnc) -{ - invalidate_mapping_pages(btnc, 0, -1); - truncate_inode_pages(btnc, 0); + bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); + if (unlikely(!bh)) + return NULL; + + if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || + buffer_dirty(bh))) { + brelse(bh); + BUG(); + } + memset(bh->b_data, 0, 1 << inode->i_blkbits); + bh->b_bdev = inode->i_sb->s_bdev; + bh->b_blocknr = blocknr; + set_buffer_mapped(bh); + set_buffer_uptodate(bh); + + unlock_page(bh->b_page); + page_cache_release(bh->b_page); + return bh; } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, struct buffer_head **pbh, - int newblk) + sector_t pblocknr, int mode, + struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; struct inode *inode = NILFS_BTNC_I(btnc); + struct page *page; int err; bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); @@ -77,18 +80,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, return -ENOMEM; err = -EEXIST; /* internal code */ - if (newblk) { - if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || - buffer_dirty(bh))) { - brelse(bh); - BUG(); - } - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; - bh->b_blocknr = blocknr; - set_buffer_mapped(bh); - set_buffer_uptodate(bh); - goto found; - } + page = bh->b_page; if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; @@ -96,61 +88,50 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, if (pblocknr == 0) { pblocknr = blocknr; if (inode->i_ino != NILFS_DAT_INO) { - struct inode *dat = - nilfs_dat_inode(NILFS_I_NILFS(inode)); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; /* blocknr is a virtual block number */ - err = nilfs_dat_translate(dat, blocknr, &pblocknr); + err = nilfs_dat_translate(nilfs->ns_dat, blocknr, + &pblocknr); if (unlikely(err)) { brelse(bh); goto out_locked; } } } - lock_buffer(bh); + + if (mode == READA) { + if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) { + err = -EBUSY; /* internal code */ + brelse(bh); + goto out_locked; + } + } else { /* mode == READ */ + lock_buffer(bh); + } if (buffer_uptodate(bh)) { unlock_buffer(bh); err = -EEXIST; /* internal code */ goto found; } set_buffer_mapped(bh); - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(READ, bh); + submit_bh(mode, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ + *submit_ptr = pblocknr; err = 0; found: *pbh = bh; out_locked: - unlock_page(bh->b_page); - page_cache_release(bh->b_page); + unlock_page(page); + page_cache_release(page); return err; } -int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, struct buffer_head **pbh, int newblk) -{ - struct buffer_head *bh; - int err; - - err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk); - if (err == -EEXIST) /* internal code (cache hit) */ - return 0; - if (unlikely(err)) - return err; - - bh = *pbh; - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - return -EIO; - } - return 0; -} - /** * nilfs_btnode_delete - delete B-tree node buffer * @bh: buffer to be deleted @@ -206,6 +187,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, * We cannot call radix_tree_preload for the kernels older * than 2.6.23, because it is not exported for modules. */ +retry: err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); if (err) goto failed_unlock; @@ -216,7 +198,6 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, (unsigned long long)oldkey, (unsigned long long)newkey); -retry: spin_lock_irq(&btnc->tree_lock); err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); spin_unlock_irq(&btnc->tree_lock); @@ -239,12 +220,13 @@ retry: unlock_page(obh->b_page); } - err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); - if (likely(!err)) { - BUG_ON(nbh == obh); - ctxt->newbh = nbh; - } - return err; + nbh = nilfs_btnode_create_block(btnc, newkey); + if (!nbh) + return -ENOMEM; + + BUG_ON(nbh == obh); + ctxt->newbh = nbh; + return 0; failed_unlock: unlock_page(obh->b_page); @@ -272,8 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); - if (!test_set_buffer_dirty(obh) && TestSetPageDirty(opage)) - BUG(); + mark_buffer_dirty(obh); spin_lock_irq(&btnc->tree_lock); radix_tree_delete(&btnc->page_tree, oldkey); @@ -285,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, unlock_page(opage); } else { nilfs_copy_buffer(nbh, obh); - nilfs_btnode_mark_dirty(nbh); + mark_buffer_dirty(nbh); nbh->b_blocknr = newkey; ctxt->bh = nbh; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 35faa86444a..d876b565ce6 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -29,7 +29,13 @@ #include <linux/fs.h> #include <linux/backing-dev.h> - +/** + * struct nilfs_btnode_chkey_ctxt - change key context + * @oldkey: old key of block's moving content + * @newkey: new key for block's content + * @bh: buffer head of old buffer + * @newbh: buffer head of new buffer + */ struct nilfs_btnode_chkey_ctxt { __u64 oldkey; __u64 newkey; @@ -37,13 +43,11 @@ struct nilfs_btnode_chkey_ctxt { struct buffer_head *newbh; }; -void nilfs_btnode_cache_init_once(struct address_space *); -void nilfs_btnode_cache_init(struct address_space *); void nilfs_btnode_cache_clear(struct address_space *); -int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, - struct buffer_head **, int); -int nilfs_btnode_get(struct address_space *, __u64, sector_t, - struct buffer_head **, int); +struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, + __u64 blocknr); +int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int, + struct buffer_head **, sector_t *); void nilfs_btnode_delete(struct buffer_head *); int nilfs_btnode_prepare_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); @@ -52,7 +56,4 @@ void nilfs_btnode_commit_change_key(struct address_space *, void nilfs_btnode_abort_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); -#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh) - - #endif /* _NILFS_BTNODE_H */ diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 6b37a276729..b2e3ff34762 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -29,68 +29,18 @@ #include "btnode.h" #include "btree.h" #include "alloc.h" +#include "dat.h" -/** - * struct nilfs_btree_path - A path on which B-tree operations are executed - * @bp_bh: buffer head of node block - * @bp_sib_bh: buffer head of sibling node block - * @bp_index: index of child node - * @bp_oldreq: ptr end request for old ptr - * @bp_newreq: ptr alloc request for new ptr - * @bp_op: rebalance operation - */ -struct nilfs_btree_path { - struct buffer_head *bp_bh; - struct buffer_head *bp_sib_bh; - int bp_index; - union nilfs_bmap_ptr_req bp_oldreq; - union nilfs_bmap_ptr_req bp_newreq; - struct nilfs_btnode_chkey_ctxt bp_ctxt; - void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *, - int, __u64 *, __u64 *); -}; - -/* - * B-tree path operations - */ - -static struct kmem_cache *nilfs_btree_path_cache; - -int __init nilfs_btree_path_cache_init(void) -{ - nilfs_btree_path_cache = - kmem_cache_create("nilfs2_btree_path_cache", - sizeof(struct nilfs_btree_path) * - NILFS_BTREE_LEVEL_MAX, 0, 0, NULL); - return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM; -} - -void nilfs_btree_path_cache_destroy(void) -{ - kmem_cache_destroy(nilfs_btree_path_cache); -} - -static inline struct nilfs_btree_path * -nilfs_btree_alloc_path(const struct nilfs_btree *btree) +static struct nilfs_btree_path *nilfs_btree_alloc_path(void) { - return (struct nilfs_btree_path *) - kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); -} - -static inline void nilfs_btree_free_path(const struct nilfs_btree *btree, - struct nilfs_btree_path *path) -{ - kmem_cache_free(nilfs_btree_path_cache, path); -} + struct nilfs_btree_path *path; + int level = NILFS_BTREE_LEVEL_DATA; -static void nilfs_btree_init_path(const struct nilfs_btree *btree, - struct nilfs_btree_path *path) -{ - int level; + path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); + if (path == NULL) + goto out; - for (level = NILFS_BTREE_LEVEL_DATA; - level < NILFS_BTREE_LEVEL_MAX; - level++) { + for (; level < NILFS_BTREE_LEVEL_MAX; level++) { path[level].bp_bh = NULL; path[level].bp_sib_bh = NULL; path[level].bp_index = 0; @@ -98,201 +48,163 @@ static void nilfs_btree_init_path(const struct nilfs_btree *btree, path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; path[level].bp_op = NULL; } + +out: + return path; } -static void nilfs_btree_clear_path(const struct nilfs_btree *btree, - struct nilfs_btree_path *path) +static void nilfs_btree_free_path(struct nilfs_btree_path *path) { - int level; + int level = NILFS_BTREE_LEVEL_DATA; - for (level = NILFS_BTREE_LEVEL_DATA; - level < NILFS_BTREE_LEVEL_MAX; - level++) { - if (path[level].bp_bh != NULL) { - nilfs_bmap_put_block(&btree->bt_bmap, - path[level].bp_bh); - path[level].bp_bh = NULL; - } - /* sib_bh is released or deleted by prepare or commit - * operations. */ - path[level].bp_sib_bh = NULL; - path[level].bp_index = 0; - path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; - path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; - path[level].bp_op = NULL; - } -} + for (; level < NILFS_BTREE_LEVEL_MAX; level++) + brelse(path[level].bp_bh); + kmem_cache_free(nilfs_btree_path_cache, path); +} /* * B-tree node operations */ +static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, + __u64 ptr, struct buffer_head **bhp) +{ + struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct buffer_head *bh; + + bh = nilfs_btnode_create_block(btnc, ptr); + if (!bh) + return -ENOMEM; + + set_buffer_nilfs_volatile(bh); + *bhp = bh; + return 0; +} -static inline int -nilfs_btree_node_get_flags(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node) +static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) { return node->bn_flags; } -static inline void -nilfs_btree_node_set_flags(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - int flags) +static void +nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) { node->bn_flags = flags; } -static inline int nilfs_btree_node_root(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node) +static int nilfs_btree_node_root(const struct nilfs_btree_node *node) { - return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT; + return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; } -static inline int -nilfs_btree_node_get_level(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node) +static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) { return node->bn_level; } -static inline void -nilfs_btree_node_set_level(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - int level) +static void +nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) { node->bn_level = level; } -static inline int -nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node) +static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) { return le16_to_cpu(node->bn_nchildren); } -static inline void -nilfs_btree_node_set_nchildren(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - int nchildren) +static void +nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) { node->bn_nchildren = cpu_to_le16(nchildren); } -static inline int -nilfs_btree_node_size(const struct nilfs_btree *btree) -{ - return 1 << btree->bt_bmap.b_inode->i_blkbits; -} - -static inline int -nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node) +static int nilfs_btree_node_size(const struct nilfs_bmap *btree) { - return nilfs_btree_node_root(btree, node) ? - NILFS_BTREE_ROOT_NCHILDREN_MIN : - NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); + return 1 << btree->b_inode->i_blkbits; } -static inline int -nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node) +static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) { - return nilfs_btree_node_root(btree, node) ? - NILFS_BTREE_ROOT_NCHILDREN_MAX : - NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); + return btree->b_nchildren_per_block; } -static inline __le64 * -nilfs_btree_node_dkeys(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node) +static __le64 * +nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) { return (__le64 *)((char *)(node + 1) + - (nilfs_btree_node_root(btree, node) ? + (nilfs_btree_node_root(node) ? 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); } -static inline __le64 * -nilfs_btree_node_dptrs(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node) +static __le64 * +nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) { - return (__le64 *)(nilfs_btree_node_dkeys(btree, node) + - nilfs_btree_node_nchildren_max(btree, node)); + return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); } -static inline __u64 -nilfs_btree_node_get_key(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node, int index) +static __u64 +nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) { - return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) + - index)); + return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); } -static inline void -nilfs_btree_node_set_key(struct nilfs_btree *btree, - struct nilfs_btree_node *node, int index, __u64 key) +static void +nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) { - *(nilfs_btree_node_dkeys(btree, node) + index) = - nilfs_bmap_key_to_dkey(key); + *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); } -static inline __u64 -nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node, - int index) +static __u64 +nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, + int ncmax) { - return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) + - index)); + return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); } -static inline void -nilfs_btree_node_set_ptr(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - int index, - __u64 ptr) +static void +nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, + int ncmax) { - *(nilfs_btree_node_dptrs(btree, node) + index) = - nilfs_bmap_ptr_to_dptr(ptr); + *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); } -static void nilfs_btree_node_init(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - int flags, int level, int nchildren, +static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, + int level, int nchildren, int ncmax, const __u64 *keys, const __u64 *ptrs) { __le64 *dkeys; __le64 *dptrs; int i; - nilfs_btree_node_set_flags(btree, node, flags); - nilfs_btree_node_set_level(btree, node, level); - nilfs_btree_node_set_nchildren(btree, node, nchildren); + nilfs_btree_node_set_flags(node, flags); + nilfs_btree_node_set_level(node, level); + nilfs_btree_node_set_nchildren(node, nchildren); - dkeys = nilfs_btree_node_dkeys(btree, node); - dptrs = nilfs_btree_node_dptrs(btree, node); + dkeys = nilfs_btree_node_dkeys(node); + dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nchildren; i++) { - dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); - dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); + dkeys[i] = cpu_to_le64(keys[i]); + dptrs[i] = cpu_to_le64(ptrs[i]); } } /* Assume the buffer heads corresponding to left and right are locked. */ -static void nilfs_btree_node_move_left(struct nilfs_btree *btree, - struct nilfs_btree_node *left, +static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, struct nilfs_btree_node *right, - int n) + int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; - ldkeys = nilfs_btree_node_dkeys(btree, left); - ldptrs = nilfs_btree_node_dptrs(btree, left); - lnchildren = nilfs_btree_node_get_nchildren(btree, left); + ldkeys = nilfs_btree_node_dkeys(left); + ldptrs = nilfs_btree_node_dptrs(left, lncmax); + lnchildren = nilfs_btree_node_get_nchildren(left); - rdkeys = nilfs_btree_node_dkeys(btree, right); - rdptrs = nilfs_btree_node_dptrs(btree, right); - rnchildren = nilfs_btree_node_get_nchildren(btree, right); + rdkeys = nilfs_btree_node_dkeys(right); + rdptrs = nilfs_btree_node_dptrs(right, rncmax); + rnchildren = nilfs_btree_node_get_nchildren(right); memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); @@ -301,27 +213,26 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree, lnchildren += n; rnchildren -= n; - nilfs_btree_node_set_nchildren(btree, left, lnchildren); - nilfs_btree_node_set_nchildren(btree, right, rnchildren); + nilfs_btree_node_set_nchildren(left, lnchildren); + nilfs_btree_node_set_nchildren(right, rnchildren); } /* Assume that the buffer heads corresponding to left and right are locked. */ -static void nilfs_btree_node_move_right(struct nilfs_btree *btree, - struct nilfs_btree_node *left, +static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, struct nilfs_btree_node *right, - int n) + int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; - ldkeys = nilfs_btree_node_dkeys(btree, left); - ldptrs = nilfs_btree_node_dptrs(btree, left); - lnchildren = nilfs_btree_node_get_nchildren(btree, left); + ldkeys = nilfs_btree_node_dkeys(left); + ldptrs = nilfs_btree_node_dptrs(left, lncmax); + lnchildren = nilfs_btree_node_get_nchildren(left); - rdkeys = nilfs_btree_node_dkeys(btree, right); - rdptrs = nilfs_btree_node_dptrs(btree, right); - rnchildren = nilfs_btree_node_get_nchildren(btree, right); + rdkeys = nilfs_btree_node_dkeys(right); + rdptrs = nilfs_btree_node_dptrs(right, rncmax); + rnchildren = nilfs_btree_node_get_nchildren(right); memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); @@ -330,38 +241,36 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree, lnchildren -= n; rnchildren += n; - nilfs_btree_node_set_nchildren(btree, left, lnchildren); - nilfs_btree_node_set_nchildren(btree, right, rnchildren); + nilfs_btree_node_set_nchildren(left, lnchildren); + nilfs_btree_node_set_nchildren(right, rnchildren); } /* Assume that the buffer head corresponding to node is locked. */ -static void nilfs_btree_node_insert(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - __u64 key, __u64 ptr, int index) +static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, + __u64 key, __u64 ptr, int ncmax) { __le64 *dkeys; __le64 *dptrs; int nchildren; - dkeys = nilfs_btree_node_dkeys(btree, node); - dptrs = nilfs_btree_node_dptrs(btree, node); - nchildren = nilfs_btree_node_get_nchildren(btree, node); + dkeys = nilfs_btree_node_dkeys(node); + dptrs = nilfs_btree_node_dptrs(node, ncmax); + nchildren = nilfs_btree_node_get_nchildren(node); if (index < nchildren) { memmove(dkeys + index + 1, dkeys + index, (nchildren - index) * sizeof(*dkeys)); memmove(dptrs + index + 1, dptrs + index, (nchildren - index) * sizeof(*dptrs)); } - dkeys[index] = nilfs_bmap_key_to_dkey(key); - dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); + dkeys[index] = cpu_to_le64(key); + dptrs[index] = cpu_to_le64(ptr); nchildren++; - nilfs_btree_node_set_nchildren(btree, node, nchildren); + nilfs_btree_node_set_nchildren(node, nchildren); } /* Assume that the buffer head corresponding to node is locked. */ -static void nilfs_btree_node_delete(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - __u64 *keyp, __u64 *ptrp, int index) +static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, + __u64 *keyp, __u64 *ptrp, int ncmax) { __u64 key; __u64 ptr; @@ -369,11 +278,11 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree, __le64 *dptrs; int nchildren; - dkeys = nilfs_btree_node_dkeys(btree, node); - dptrs = nilfs_btree_node_dptrs(btree, node); - key = nilfs_bmap_dkey_to_key(dkeys[index]); - ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); - nchildren = nilfs_btree_node_get_nchildren(btree, node); + dkeys = nilfs_btree_node_dkeys(node); + dptrs = nilfs_btree_node_dptrs(node, ncmax); + key = le64_to_cpu(dkeys[index]); + ptr = le64_to_cpu(dptrs[index]); + nchildren = nilfs_btree_node_get_nchildren(node); if (keyp != NULL) *keyp = key; if (ptrp != NULL) @@ -386,11 +295,10 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree, (nchildren - index - 1) * sizeof(*dptrs)); } nchildren--; - nilfs_btree_node_set_nchildren(btree, node, nchildren); + nilfs_btree_node_set_nchildren(node, nchildren); } -static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node, +static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, __u64 key, int *indexp) { __u64 nkey; @@ -398,12 +306,12 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, /* binary search */ low = 0; - high = nilfs_btree_node_get_nchildren(btree, node) - 1; + high = nilfs_btree_node_get_nchildren(node) - 1; index = 0; s = 0; while (low <= high) { index = (low + high) / 2; - nkey = nilfs_btree_node_get_key(btree, node, index); + nkey = nilfs_btree_node_get_key(node, index); if (nkey == key) { s = 0; goto out; @@ -417,9 +325,8 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, } /* adjust index */ - if (nilfs_btree_node_get_level(btree, node) > - NILFS_BTREE_LEVEL_NODE_MIN) { - if ((s > 0) && (index > 0)) + if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { + if (s > 0 && index > 0) index--; } else if (s < 0) index++; @@ -430,78 +337,218 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, return s == 0; } -static inline struct nilfs_btree_node * -nilfs_btree_get_root(const struct nilfs_btree *btree) +/** + * nilfs_btree_node_broken - verify consistency of btree node + * @node: btree node block to be examined + * @size: node size (in bytes) + * @blocknr: block number + * + * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + */ +static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, + size_t size, sector_t blocknr) +{ + int level, flags, nchildren; + int ret = 0; + + level = nilfs_btree_node_get_level(node); + flags = nilfs_btree_node_get_flags(node); + nchildren = nilfs_btree_node_get_nchildren(node); + + if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || + level >= NILFS_BTREE_LEVEL_MAX || + (flags & NILFS_BTREE_NODE_ROOT) || + nchildren < 0 || + nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { + printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): " + "level = %d, flags = 0x%x, nchildren = %d\n", + (unsigned long long)blocknr, level, flags, nchildren); + ret = 1; + } + return ret; +} + +int nilfs_btree_broken_node_block(struct buffer_head *bh) { - return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data; + int ret; + + if (buffer_nilfs_checked(bh)) + return 0; + + ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, + bh->b_size, bh->b_blocknr); + if (likely(!ret)) + set_buffer_nilfs_checked(bh); + return ret; } -static inline struct nilfs_btree_node * -nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree, - const struct nilfs_btree_path *path, - int level) +static struct nilfs_btree_node * +nilfs_btree_get_root(const struct nilfs_bmap *btree) +{ + return (struct nilfs_btree_node *)btree->b_u.u_data; +} + +static struct nilfs_btree_node * +nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_bh->b_data; } -static inline struct nilfs_btree_node * -nilfs_btree_get_sib_node(const struct nilfs_btree *btree, - const struct nilfs_btree_path *path, - int level) +static struct nilfs_btree_node * +nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; } -static inline int nilfs_btree_height(const struct nilfs_btree *btree) +static int nilfs_btree_height(const struct nilfs_bmap *btree) { - return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree)) - + 1; + return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; } -static inline struct nilfs_btree_node * -nilfs_btree_get_node(const struct nilfs_btree *btree, +static struct nilfs_btree_node * +nilfs_btree_get_node(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, - int level) + int level, int *ncmaxp) +{ + struct nilfs_btree_node *node; + + if (level == nilfs_btree_height(btree) - 1) { + node = nilfs_btree_get_root(btree); + *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; + } else { + node = nilfs_btree_get_nonroot_node(path, level); + *ncmaxp = nilfs_btree_nchildren_per_block(btree); + } + return node; +} + +static int +nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) { - return (level == nilfs_btree_height(btree) - 1) ? - nilfs_btree_get_root(btree) : - nilfs_btree_get_nonroot_node(btree, path, level); + if (unlikely(nilfs_btree_node_get_level(node) != level)) { + dump_stack(); + printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n", + nilfs_btree_node_get_level(node), level); + return 1; + } + return 0; } -static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, +struct nilfs_btree_readahead_info { + struct nilfs_btree_node *node; /* parent node */ + int max_ra_blocks; /* max nof blocks to read ahead */ + int index; /* current index on the parent node */ + int ncmax; /* nof children in the parent node */ +}; + +static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, + struct buffer_head **bhp, + const struct nilfs_btree_readahead_info *ra) +{ + struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct buffer_head *bh, *ra_bh; + sector_t submit_ptr = 0; + int ret; + + ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr); + if (ret) { + if (ret != -EEXIST) + return ret; + goto out_check; + } + + if (ra) { + int i, n; + __u64 ptr2; + + /* read ahead sibling nodes */ + for (n = ra->max_ra_blocks, i = ra->index + 1; + n > 0 && i < ra->ncmax; n--, i++) { + ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); + + ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA, + &ra_bh, &submit_ptr); + if (likely(!ret || ret == -EEXIST)) + brelse(ra_bh); + else if (ret != -EBUSY) + break; + if (!buffer_locked(bh)) + goto out_no_wait; + } + } + + wait_on_buffer(bh); + + out_no_wait: + if (!buffer_uptodate(bh)) { + brelse(bh); + return -EIO; + } + + out_check: + if (nilfs_btree_broken_node_block(bh)) { + clear_buffer_uptodate(bh); + brelse(bh); + return -EINVAL; + } + + *bhp = bh; + return 0; +} + +static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, + struct buffer_head **bhp) +{ + return __nilfs_btree_get_block(btree, ptr, bhp, NULL); +} + +static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, - __u64 key, __u64 *ptrp, int minlevel) + __u64 key, __u64 *ptrp, int minlevel, + int readahead) { struct nilfs_btree_node *node; + struct nilfs_btree_readahead_info p, *ra; __u64 ptr; - int level, index, found, ret; + int level, index, found, ncmax, ret; node = nilfs_btree_get_root(btree); - level = nilfs_btree_node_get_level(btree, node); - if ((level < minlevel) || - (nilfs_btree_node_get_nchildren(btree, node) <= 0)) + level = nilfs_btree_node_get_level(node); + if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) return -ENOENT; - found = nilfs_btree_node_lookup(btree, node, key, &index); - ptr = nilfs_btree_node_get_ptr(btree, node, index); + found = nilfs_btree_node_lookup(node, key, &index); + ptr = nilfs_btree_node_get_ptr(node, index, + NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; - for (level--; level >= minlevel; level--) { - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, - &path[level].bp_bh); + ncmax = nilfs_btree_nchildren_per_block(btree); + + while (--level >= minlevel) { + ra = NULL; + if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { + p.node = nilfs_btree_get_node(btree, path, level + 1, + &p.ncmax); + p.index = index; + p.max_ra_blocks = 7; + ra = &p; + } + ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, + ra); if (ret < 0) return ret; - node = nilfs_btree_get_nonroot_node(btree, path, level); - BUG_ON(level != nilfs_btree_node_get_level(btree, node)); + + node = nilfs_btree_get_nonroot_node(path, level); + if (nilfs_btree_bad_node(node, level)) + return -EINVAL; if (!found) - found = nilfs_btree_node_lookup(btree, node, key, - &index); + found = nilfs_btree_node_lookup(node, key, &index); else index = 0; - if (index < nilfs_btree_node_nchildren_max(btree, node)) - ptr = nilfs_btree_node_get_ptr(btree, node, index); - else { + if (index < ncmax) { + ptr = nilfs_btree_node_get_ptr(node, index, ncmax); + } else { WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); /* insert */ ptr = NILFS_BMAP_INVALID_PTR; @@ -517,135 +564,207 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, return 0; } -static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, +static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; __u64 ptr; - int index, level, ret; + int index, level, ncmax, ret; node = nilfs_btree_get_root(btree); - index = nilfs_btree_node_get_nchildren(btree, node) - 1; + index = nilfs_btree_node_get_nchildren(node) - 1; if (index < 0) return -ENOENT; - level = nilfs_btree_node_get_level(btree, node); - ptr = nilfs_btree_node_get_ptr(btree, node, index); + level = nilfs_btree_node_get_level(node); + ptr = nilfs_btree_node_get_ptr(node, index, + NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; + ncmax = nilfs_btree_nchildren_per_block(btree); for (level--; level > 0; level--) { - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, - &path[level].bp_bh); + ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); if (ret < 0) return ret; - node = nilfs_btree_get_nonroot_node(btree, path, level); - BUG_ON(level != nilfs_btree_node_get_level(btree, node)); - index = nilfs_btree_node_get_nchildren(btree, node) - 1; - ptr = nilfs_btree_node_get_ptr(btree, node, index); + node = nilfs_btree_get_nonroot_node(path, level); + if (nilfs_btree_bad_node(node, level)) + return -EINVAL; + index = nilfs_btree_node_get_nchildren(node) - 1; + ptr = nilfs_btree_node_get_ptr(node, index, ncmax); path[level].bp_index = index; } if (keyp != NULL) - *keyp = nilfs_btree_node_get_key(btree, node, index); + *keyp = nilfs_btree_node_get_key(node, index); if (ptrp != NULL) *ptrp = ptr; return 0; } -static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, +static int nilfs_btree_lookup(const struct nilfs_bmap *btree, __u64 key, int level, __u64 *ptrp) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; - __u64 ptr; int ret; - btree = (struct nilfs_btree *)bmap; - path = nilfs_btree_alloc_path(btree); + path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - nilfs_btree_init_path(btree, path); - ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); + ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); - if (ptrp != NULL) - *ptrp = ptr; + nilfs_btree_free_path(path); + + return ret; +} + +static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, + __u64 key, __u64 *ptrp, unsigned maxblocks) +{ + struct nilfs_btree_path *path; + struct nilfs_btree_node *node; + struct inode *dat = NULL; + __u64 ptr, ptr2; + sector_t blocknr; + int level = NILFS_BTREE_LEVEL_NODE_MIN; + int ret, cnt, index, maxlevel, ncmax; + struct nilfs_btree_readahead_info p; + + path = nilfs_btree_alloc_path(); + if (path == NULL) + return -ENOMEM; - nilfs_btree_clear_path(btree, path); - nilfs_btree_free_path(btree, path); + ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); + if (ret < 0) + goto out; + + if (NILFS_BMAP_USE_VBN(btree)) { + dat = nilfs_bmap_get_dat(btree); + ret = nilfs_dat_translate(dat, ptr, &blocknr); + if (ret < 0) + goto out; + ptr = blocknr; + } + cnt = 1; + if (cnt == maxblocks) + goto end; + + maxlevel = nilfs_btree_height(btree) - 1; + node = nilfs_btree_get_node(btree, path, level, &ncmax); + index = path[level].bp_index + 1; + for (;;) { + while (index < nilfs_btree_node_get_nchildren(node)) { + if (nilfs_btree_node_get_key(node, index) != + key + cnt) + goto end; + ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); + if (dat) { + ret = nilfs_dat_translate(dat, ptr2, &blocknr); + if (ret < 0) + goto out; + ptr2 = blocknr; + } + if (ptr2 != ptr + cnt || ++cnt == maxblocks) + goto end; + index++; + continue; + } + if (level == maxlevel) + break; + + /* look-up right sibling node */ + p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); + p.index = path[level + 1].bp_index + 1; + p.max_ra_blocks = 7; + if (p.index >= nilfs_btree_node_get_nchildren(p.node) || + nilfs_btree_node_get_key(p.node, p.index) != key + cnt) + break; + ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); + path[level + 1].bp_index = p.index; + + brelse(path[level].bp_bh); + path[level].bp_bh = NULL; + ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, + &p); + if (ret < 0) + goto out; + node = nilfs_btree_get_nonroot_node(path, level); + ncmax = nilfs_btree_nchildren_per_block(btree); + index = 0; + path[level].bp_index = index; + } + end: + *ptrp = ptr; + ret = cnt; + out: + nilfs_btree_free_path(path); return ret; } -static void nilfs_btree_promote_key(struct nilfs_btree *btree, +static void nilfs_btree_promote_key(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 key) { if (level < nilfs_btree_height(btree) - 1) { do { - lock_buffer(path[level].bp_bh); nilfs_btree_node_set_key( - btree, - nilfs_btree_get_nonroot_node( - btree, path, level), + nilfs_btree_get_nonroot_node(path, level), path[level].bp_index, key); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); } while ((path[level].bp_index == 0) && (++level < nilfs_btree_height(btree) - 1)); } /* root */ if (level == nilfs_btree_height(btree) - 1) { - nilfs_btree_node_set_key(btree, - nilfs_btree_get_root(btree), + nilfs_btree_node_set_key(nilfs_btree_get_root(btree), path[level].bp_index, key); } } -static void nilfs_btree_do_insert(struct nilfs_btree *btree, +static void nilfs_btree_do_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; + int ncblk; if (level < nilfs_btree_height(btree) - 1) { - lock_buffer(path[level].bp_bh); - node = nilfs_btree_get_nonroot_node(btree, path, level); - nilfs_btree_node_insert(btree, node, *keyp, *ptrp, - path[level].bp_index); + node = nilfs_btree_get_nonroot_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); + nilfs_btree_node_insert(node, path[level].bp_index, + *keyp, *ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, - nilfs_btree_node_get_key( - btree, node, 0)); + nilfs_btree_node_get_key(node, + 0)); } else { node = nilfs_btree_get_root(btree); - nilfs_btree_node_insert(btree, node, *keyp, *ptrp, - path[level].bp_index); + nilfs_btree_node_insert(node, path[level].bp_index, + *keyp, *ptrp, + NILFS_BTREE_ROOT_NCHILDREN_MAX); } } -static void nilfs_btree_carry_left(struct nilfs_btree *btree, +static void nilfs_btree_carry_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; - int nchildren, lnchildren, n, move; - - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); + int nchildren, lnchildren, n, move, ncblk; - node = nilfs_btree_get_nonroot_node(btree, path, level); - left = nilfs_btree_get_sib_node(btree, path, level); - nchildren = nilfs_btree_node_get_nchildren(btree, node); - lnchildren = nilfs_btree_node_get_nchildren(btree, left); + node = nilfs_btree_get_nonroot_node(path, level); + left = nilfs_btree_get_sib_node(path, level); + nchildren = nilfs_btree_node_get_nchildren(node); + lnchildren = nilfs_btree_node_get_nchildren(left); + ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + lnchildren + 1) / 2 - lnchildren; @@ -655,27 +774,24 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, move = 1; } - nilfs_btree_node_move_left(btree, left, node, n); + nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, - nilfs_btree_node_get_key(btree, node, 0)); + nilfs_btree_node_get_key(node, 0)); if (move) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += lnchildren; path[level + 1].bp_index--; } else { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index -= n; } @@ -683,20 +799,18 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } -static void nilfs_btree_carry_right(struct nilfs_btree *btree, +static void nilfs_btree_carry_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - int nchildren, rnchildren, n, move; + int nchildren, rnchildren, n, move, ncblk; - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - - node = nilfs_btree_get_nonroot_node(btree, path, level); - right = nilfs_btree_get_sib_node(btree, path, level); - nchildren = nilfs_btree_node_get_nchildren(btree, node); - rnchildren = nilfs_btree_node_get_nchildren(btree, right); + node = nilfs_btree_get_nonroot_node(path, level); + right = nilfs_btree_get_sib_node(path, level); + nchildren = nilfs_btree_node_get_nchildren(node); + rnchildren = nilfs_btree_node_get_nchildren(right); + ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + rnchildren + 1) / 2 - rnchildren; @@ -706,51 +820,45 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, move = 1; } - nilfs_btree_node_move_right(btree, node, right, n); + nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, - nilfs_btree_node_get_key(btree, right, 0)); + nilfs_btree_node_get_key(right, 0)); path[level + 1].bp_index--; if (move) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; - path[level].bp_index -= - nilfs_btree_node_get_nchildren(btree, node); + path[level].bp_index -= nilfs_btree_node_get_nchildren(node); path[level + 1].bp_index++; } else { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } -static void nilfs_btree_split(struct nilfs_btree *btree, +static void nilfs_btree_split(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; __u64 newkey; __u64 newptr; - int nchildren, n, move; + int nchildren, n, move, ncblk; - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - - node = nilfs_btree_get_nonroot_node(btree, path, level); - right = nilfs_btree_get_sib_node(btree, path, level); - nchildren = nilfs_btree_node_get_nchildren(btree, node); + node = nilfs_btree_get_nonroot_node(path, level); + right = nilfs_btree_get_sib_node(path, level); + nchildren = nilfs_btree_node_get_nchildren(node); + ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + 1) / 2; @@ -759,80 +867,74 @@ static void nilfs_btree_split(struct nilfs_btree *btree, move = 1; } - nilfs_btree_node_move_right(btree, node, right, n); + nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); - newkey = nilfs_btree_node_get_key(btree, right, 0); + newkey = nilfs_btree_node_get_key(right, 0); newptr = path[level].bp_newreq.bpr_ptr; if (move) { - path[level].bp_index -= - nilfs_btree_node_get_nchildren(btree, node); - nilfs_btree_node_insert(btree, right, *keyp, *ptrp, - path[level].bp_index); + path[level].bp_index -= nilfs_btree_node_get_nchildren(node); + nilfs_btree_node_insert(right, path[level].bp_index, + *keyp, *ptrp, ncblk); - *keyp = nilfs_btree_node_get_key(btree, right, 0); + *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; } else { nilfs_btree_do_insert(btree, path, level, keyp, ptrp); - *keyp = nilfs_btree_node_get_key(btree, right, 0); + *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } path[level + 1].bp_index++; } -static void nilfs_btree_grow(struct nilfs_btree *btree, +static void nilfs_btree_grow(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; - int n; - - lock_buffer(path[level].bp_sib_bh); + int n, ncblk; root = nilfs_btree_get_root(btree); - child = nilfs_btree_get_sib_node(btree, path, level); + child = nilfs_btree_get_sib_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); - n = nilfs_btree_node_get_nchildren(btree, root); + n = nilfs_btree_node_get_nchildren(root); - nilfs_btree_node_move_right(btree, root, child, n); - nilfs_btree_node_set_level(btree, root, level + 1); + nilfs_btree_node_move_right(root, child, n, + NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); + nilfs_btree_node_set_level(root, level + 1); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - - unlock_buffer(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; nilfs_btree_do_insert(btree, path, level, keyp, ptrp); - *keyp = nilfs_btree_node_get_key(btree, child, 0); + *keyp = nilfs_btree_node_get_key(child, 0); *ptrp = path[level].bp_newreq.bpr_ptr; } -static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, +static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path) { struct nilfs_btree_node *node; - int level; + int level, ncmax; if (path == NULL) return NILFS_BMAP_INVALID_PTR; @@ -840,29 +942,30 @@ static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, /* left sibling */ level = NILFS_BTREE_LEVEL_NODE_MIN; if (path[level].bp_index > 0) { - node = nilfs_btree_get_node(btree, path, level); - return nilfs_btree_node_get_ptr(btree, node, - path[level].bp_index - 1); + node = nilfs_btree_get_node(btree, path, level, &ncmax); + return nilfs_btree_node_get_ptr(node, + path[level].bp_index - 1, + ncmax); } /* parent */ level = NILFS_BTREE_LEVEL_NODE_MIN + 1; if (level <= nilfs_btree_height(btree) - 1) { - node = nilfs_btree_get_node(btree, path, level); - return nilfs_btree_node_get_ptr(btree, node, - path[level].bp_index); + node = nilfs_btree_get_node(btree, path, level, &ncmax); + return nilfs_btree_node_get_ptr(node, path[level].bp_index, + ncmax); } return NILFS_BMAP_INVALID_PTR; } -static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, +static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, __u64 key) { __u64 ptr; - ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key); + ptr = nilfs_bmap_find_target_seq(btree, key); if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; @@ -873,17 +976,10 @@ static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, return ptr; } /* block group */ - return nilfs_bmap_find_target_in_group(&btree->bt_bmap); + return nilfs_bmap_find_target_in_group(btree); } -static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key, - __u64 ptr) -{ - btree->bt_bmap.b_last_allocated_key = key; - btree->bt_bmap.b_last_allocated_ptr = ptr; -} - -static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, +static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, __u64 key, __u64 ptr, struct nilfs_bmap_stats *stats) @@ -891,102 +987,99 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; - int pindex, level, ret; + int pindex, level, ncmax, ncblk, ret; + struct inode *dat = NULL; stats->bs_nblocks = 0; level = NILFS_BTREE_LEVEL_DATA; /* allocate a new ptr for data block */ - if (btree->bt_ops->btop_find_target != NULL) + if (NILFS_BMAP_USE_VBN(btree)) { path[level].bp_newreq.bpr_ptr = - btree->bt_ops->btop_find_target(btree, path, key); + nilfs_btree_find_target_v(btree, path, key); + dat = nilfs_bmap_get_dat(btree); + } - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_data; + ncblk = nilfs_btree_nchildren_per_block(btree); + for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < nilfs_btree_height(btree) - 1; level++) { - node = nilfs_btree_get_nonroot_node(btree, path, level); - if (nilfs_btree_node_get_nchildren(btree, node) < - nilfs_btree_node_nchildren_max(btree, node)) { + node = nilfs_btree_get_nonroot_node(path, level); + if (nilfs_btree_node_get_nchildren(node) < ncblk) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; } - parent = nilfs_btree_get_node(btree, path, level + 1); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; /* left sibling */ if (pindex > 0) { - sibptr = nilfs_btree_node_get_ptr(btree, parent, - pindex - 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, + ncmax); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; - if (nilfs_btree_node_get_nchildren(btree, sib) < - nilfs_btree_node_nchildren_max(btree, sib)) { + if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_left; stats->bs_nblocks++; goto out; - } else - nilfs_bmap_put_block(&btree->bt_bmap, bh); + } else { + brelse(bh); + } } /* right sibling */ - if (pindex < - nilfs_btree_node_get_nchildren(btree, parent) - 1) { - sibptr = nilfs_btree_node_get_ptr(btree, parent, - pindex + 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { + sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, + ncmax); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; - if (nilfs_btree_node_get_nchildren(btree, sib) < - nilfs_btree_node_nchildren_max(btree, sib)) { + if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_right; stats->bs_nblocks++; goto out; - } else - nilfs_bmap_put_block(&btree->bt_bmap, bh); + } else { + brelse(bh); + } } /* split */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(btree, + &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; - ret = nilfs_bmap_get_new_block(&btree->bt_bmap, - path[level].bp_newreq.bpr_ptr, - &bh); + ret = nilfs_btree_get_new_block(btree, + path[level].bp_newreq.bpr_ptr, + &bh); if (ret < 0) goto err_out_curr_node; stats->bs_nblocks++; - lock_buffer(bh); - nilfs_btree_node_init(btree, - (struct nilfs_btree_node *)bh->b_data, - 0, level, 0, NULL, NULL); - unlock_buffer(bh); + sib = (struct nilfs_btree_node *)bh->b_data; + nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_split; } /* root */ node = nilfs_btree_get_root(btree); - if (nilfs_btree_node_get_nchildren(btree, node) < - nilfs_btree_node_nchildren_max(btree, node)) { + if (nilfs_btree_node_get_nchildren(node) < + NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; @@ -994,19 +1087,16 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* grow */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; - ret = nilfs_bmap_get_new_block(&btree->bt_bmap, - path[level].bp_newreq.bpr_ptr, &bh); + ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, + &bh); if (ret < 0) goto err_out_curr_node; - lock_buffer(bh); - nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, - 0, level, 0, NULL, NULL); - unlock_buffer(bh); + nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, + 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_grow; @@ -1023,62 +1113,57 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* error */ err_out_curr_node: - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq); + nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_child_node: for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + nilfs_btnode_delete(path[level].bp_sib_bh); + nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); } - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq); + nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_data: *levelp = level; stats->bs_nblocks = 0; return ret; } -static void nilfs_btree_commit_insert(struct nilfs_btree *btree, +static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, __u64 key, __u64 ptr) { + struct inode *dat = NULL; int level; set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; - if (btree->bt_ops->btop_set_target != NULL) - btree->bt_ops->btop_set_target(btree, key, ptr); + if (NILFS_BMAP_USE_VBN(btree)) { + nilfs_bmap_set_target_v(btree, key, ptr); + dat = nilfs_bmap_get_dat(btree); + } for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { - if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) { - btree->bt_bmap.b_pops->bpop_commit_alloc_ptr( - &btree->bt_bmap, &path[level - 1].bp_newreq); - } + nilfs_bmap_commit_alloc_ptr(btree, + &path[level - 1].bp_newreq, dat); path[level].bp_op(btree, path, level, &key, &ptr); } - if (!nilfs_bmap_dirty(&btree->bt_bmap)) - nilfs_bmap_set_dirty(&btree->bt_bmap); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); } -static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) +static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; int level, ret; - btree = (struct nilfs_btree *)bmap; - path = nilfs_btree_alloc_path(btree); + path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - nilfs_btree_init_path(btree, path); ret = nilfs_btree_do_lookup(btree, path, key, NULL, - NILFS_BTREE_LEVEL_NODE_MIN); + NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret != -ENOENT) { if (ret == 0) ret = -EEXIST; @@ -1089,245 +1174,230 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) if (ret < 0) goto out; nilfs_btree_commit_insert(btree, path, level, key, ptr); - nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); + nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); out: - nilfs_btree_clear_path(btree, path); - nilfs_btree_free_path(btree, path); + nilfs_btree_free_path(path); return ret; } -static void nilfs_btree_do_delete(struct nilfs_btree *btree, +static void nilfs_btree_do_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; + int ncblk; if (level < nilfs_btree_height(btree) - 1) { - lock_buffer(path[level].bp_bh); - node = nilfs_btree_get_nonroot_node(btree, path, level); - nilfs_btree_node_delete(btree, node, keyp, ptrp, - path[level].bp_index); + node = nilfs_btree_get_nonroot_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); + nilfs_btree_node_delete(node, path[level].bp_index, + keyp, ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, - nilfs_btree_node_get_key(btree, node, 0)); + nilfs_btree_node_get_key(node, 0)); } else { node = nilfs_btree_get_root(btree); - nilfs_btree_node_delete(btree, node, keyp, ptrp, - path[level].bp_index); + nilfs_btree_node_delete(node, path[level].bp_index, + keyp, ptrp, + NILFS_BTREE_ROOT_NCHILDREN_MAX); } } -static void nilfs_btree_borrow_left(struct nilfs_btree *btree, +static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; - int nchildren, lnchildren, n; + int nchildren, lnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - - node = nilfs_btree_get_nonroot_node(btree, path, level); - left = nilfs_btree_get_sib_node(btree, path, level); - nchildren = nilfs_btree_node_get_nchildren(btree, node); - lnchildren = nilfs_btree_node_get_nchildren(btree, left); + node = nilfs_btree_get_nonroot_node(path, level); + left = nilfs_btree_get_sib_node(path, level); + nchildren = nilfs_btree_node_get_nchildren(node); + lnchildren = nilfs_btree_node_get_nchildren(left); + ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + lnchildren) / 2 - nchildren; - nilfs_btree_node_move_right(btree, left, node, n); + nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, - nilfs_btree_node_get_key(btree, node, 0)); + nilfs_btree_node_get_key(node, 0)); - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index += n; } -static void nilfs_btree_borrow_right(struct nilfs_btree *btree, +static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - int nchildren, rnchildren, n; + int nchildren, rnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - - node = nilfs_btree_get_nonroot_node(btree, path, level); - right = nilfs_btree_get_sib_node(btree, path, level); - nchildren = nilfs_btree_node_get_nchildren(btree, node); - rnchildren = nilfs_btree_node_get_nchildren(btree, right); + node = nilfs_btree_get_nonroot_node(path, level); + right = nilfs_btree_get_sib_node(path, level); + nchildren = nilfs_btree_node_get_nchildren(node); + rnchildren = nilfs_btree_node_get_nchildren(right); + ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + rnchildren) / 2 - nchildren; - nilfs_btree_node_move_left(btree, node, right, n); + nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, - nilfs_btree_node_get_key(btree, right, 0)); + nilfs_btree_node_get_key(right, 0)); path[level + 1].bp_index--; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } -static void nilfs_btree_concat_left(struct nilfs_btree *btree, +static void nilfs_btree_concat_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; - int n; + int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); + node = nilfs_btree_get_nonroot_node(path, level); + left = nilfs_btree_get_sib_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); - node = nilfs_btree_get_nonroot_node(btree, path, level); - left = nilfs_btree_get_sib_node(btree, path, level); + n = nilfs_btree_node_get_nchildren(node); - n = nilfs_btree_node_get_nchildren(btree, node); - - nilfs_btree_node_move_left(btree, left, node, n); + nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); + nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; - path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); + path[level].bp_index += nilfs_btree_node_get_nchildren(left); } -static void nilfs_btree_concat_right(struct nilfs_btree *btree, +static void nilfs_btree_concat_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - int n; + int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); + node = nilfs_btree_get_nonroot_node(path, level); + right = nilfs_btree_get_sib_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); - node = nilfs_btree_get_nonroot_node(btree, path, level); - right = nilfs_btree_get_sib_node(btree, path, level); + n = nilfs_btree_node_get_nchildren(right); - n = nilfs_btree_node_get_nchildren(btree, right); - - nilfs_btree_node_move_left(btree, node, right, n); + nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); - - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_bh); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); + nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level + 1].bp_index++; } -static void nilfs_btree_shrink(struct nilfs_btree *btree, +static void nilfs_btree_shrink(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; - int n; + int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); root = nilfs_btree_get_root(btree); - child = nilfs_btree_get_nonroot_node(btree, path, level); + child = nilfs_btree_get_nonroot_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); - nilfs_btree_node_delete(btree, root, NULL, NULL, 0); - nilfs_btree_node_set_level(btree, root, level); - n = nilfs_btree_node_get_nchildren(btree, child); - nilfs_btree_node_move_left(btree, root, child, n); - unlock_buffer(path[level].bp_bh); + nilfs_btree_node_delete(root, 0, NULL, NULL, + NILFS_BTREE_ROOT_NCHILDREN_MAX); + nilfs_btree_node_set_level(root, level); + n = nilfs_btree_node_get_nchildren(child); + nilfs_btree_node_move_left(root, child, n, + NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); + nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; } +static void nilfs_btree_nop(struct nilfs_bmap *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ +} -static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, +static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, - struct nilfs_bmap_stats *stats) + struct nilfs_bmap_stats *stats, + struct inode *dat) { struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; - int pindex, level, ret; + int pindex, dindex, level, ncmin, ncmax, ncblk, ret; ret = 0; stats->bs_nblocks = 0; - for (level = NILFS_BTREE_LEVEL_NODE_MIN; + ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); + ncblk = nilfs_btree_nchildren_per_block(btree); + + for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; level < nilfs_btree_height(btree) - 1; level++) { - node = nilfs_btree_get_nonroot_node(btree, path, level); + node = nilfs_btree_get_nonroot_node(path, level); path[level].bp_oldreq.bpr_ptr = - nilfs_btree_node_get_ptr(btree, node, - path[level].bp_index); - if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); - if (ret < 0) - goto err_out_child_node; - } + nilfs_btree_node_get_ptr(node, dindex, ncblk); + ret = nilfs_bmap_prepare_end_ptr(btree, + &path[level].bp_oldreq, dat); + if (ret < 0) + goto err_out_child_node; - if (nilfs_btree_node_get_nchildren(btree, node) > - nilfs_btree_node_nchildren_min(btree, node)) { + if (nilfs_btree_node_get_nchildren(node) > ncmin) { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; goto out; } - parent = nilfs_btree_get_node(btree, path, level + 1); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; + dindex = pindex; if (pindex > 0) { /* left sibling */ - sibptr = nilfs_btree_node_get_ptr(btree, parent, - pindex - 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, + ncmax); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; - if (nilfs_btree_node_get_nchildren(btree, sib) > - nilfs_btree_node_nchildren_min(btree, sib)) { + if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_left; stats->bs_nblocks++; @@ -1339,17 +1409,15 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* continue; */ } } else if (pindex < - nilfs_btree_node_get_nchildren(btree, parent) - 1) { + nilfs_btree_node_get_nchildren(parent) - 1) { /* right sibling */ - sibptr = nilfs_btree_node_get_ptr(btree, parent, - pindex + 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, + ncmax); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; - if (nilfs_btree_node_get_nchildren(btree, sib) > - nilfs_btree_node_nchildren_min(btree, sib)) { + if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_right; stats->bs_nblocks++; @@ -1358,39 +1426,49 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_concat_right; stats->bs_nblocks++; + /* + * When merging right sibling node + * into the current node, pointer to + * the right sibling node must be + * terminated instead. The adjustment + * below is required for that. + */ + dindex = pindex + 1; /* continue; */ } } else { /* no siblings */ /* the only child of the root node */ WARN_ON(level != nilfs_btree_height(btree) - 2); - if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= + if (nilfs_btree_node_get_nchildren(node) - 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_shrink; stats->bs_nblocks += 2; + level++; + path[level].bp_op = nilfs_btree_nop; + goto shrink_root_child; } else { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; + goto out; } - - goto out; - } } - node = nilfs_btree_get_root(btree); - path[level].bp_oldreq.bpr_ptr = - nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); - if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); - if (ret < 0) - goto err_out_child_node; - } /* child of the root node is deleted */ path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; +shrink_root_child: + node = nilfs_btree_get_root(btree); + path[level].bp_oldreq.bpr_ptr = + nilfs_btree_node_get_ptr(node, dindex, + NILFS_BTREE_ROOT_NCHILDREN_MAX); + + ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); + if (ret < 0) + goto err_out_child_node; + /* success */ out: *levelp = level; @@ -1398,98 +1476,87 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* error */ err_out_curr_node: - if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_abort_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); err_out_child_node: for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); - if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_abort_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + brelse(path[level].bp_sib_bh); + nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); } *levelp = level; stats->bs_nblocks = 0; return ret; } -static void nilfs_btree_commit_delete(struct nilfs_btree *btree, +static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, - int maxlevel) + int maxlevel, struct inode *dat) { int level; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { - if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_commit_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); path[level].bp_op(btree, path, level, NULL, NULL); } - if (!nilfs_bmap_dirty(&btree->bt_bmap)) - nilfs_bmap_set_dirty(&btree->bt_bmap); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); } -static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key) +static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; + struct inode *dat; int level, ret; - btree = (struct nilfs_btree *)bmap; - path = nilfs_btree_alloc_path(btree); + path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - nilfs_btree_init_path(btree, path); + ret = nilfs_btree_do_lookup(btree, path, key, NULL, - NILFS_BTREE_LEVEL_NODE_MIN); + NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret < 0) goto out; - ret = nilfs_btree_prepare_delete(btree, path, &level, &stats); + + dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; + + ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); if (ret < 0) goto out; - nilfs_btree_commit_delete(btree, path, level); - nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); + nilfs_btree_commit_delete(btree, path, level, dat); + nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); out: - nilfs_btree_clear_path(btree, path); - nilfs_btree_free_path(btree, path); + nilfs_btree_free_path(path); return ret; } -static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) +static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; int ret; - btree = (struct nilfs_btree *)bmap; - path = nilfs_btree_alloc_path(btree); + path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - nilfs_btree_init_path(btree, path); ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); - nilfs_btree_clear_path(btree, path); - nilfs_btree_free_path(btree, path); + nilfs_btree_free_path(path); return ret; } -static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) +static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) { struct buffer_head *bh; - struct nilfs_btree *btree; struct nilfs_btree_node *root, *node; __u64 maxkey, nextmaxkey; __u64 ptr; int nchildren, ret; - btree = (struct nilfs_btree *)bmap; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: @@ -1497,11 +1564,12 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) node = root; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(btree, root); + nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; - ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); - ret = nilfs_bmap_get_block(bmap, ptr, &bh); + ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, + NILFS_BTREE_ROOT_NCHILDREN_MAX); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; @@ -1510,84 +1578,86 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) return 0; } - nchildren = nilfs_btree_node_get_nchildren(btree, node); - maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1); + nchildren = nilfs_btree_node_get_nchildren(node); + maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? - nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; + nilfs_btree_node_get_key(node, nchildren - 2) : 0; if (bh != NULL) - nilfs_bmap_put_block(bmap, bh); + brelse(bh); - return (maxkey == key) && (nextmaxkey < bmap->b_low); + return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } -static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, +static int nilfs_btree_gather_data(struct nilfs_bmap *btree, __u64 *keys, __u64 *ptrs, int nitems) { struct buffer_head *bh; - struct nilfs_btree *btree; struct nilfs_btree_node *node, *root; __le64 *dkeys; __le64 *dptrs; __u64 ptr; - int nchildren, i, ret; + int nchildren, ncmax, i, ret; - btree = (struct nilfs_btree *)bmap; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; + ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(btree, root); + nchildren = nilfs_btree_node_get_nchildren(root); WARN_ON(nchildren > 1); - ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); - ret = nilfs_bmap_get_block(bmap, ptr, &bh); + ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, + NILFS_BTREE_ROOT_NCHILDREN_MAX); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + ncmax = nilfs_btree_nchildren_per_block(btree); break; default: node = NULL; return -EINVAL; } - nchildren = nilfs_btree_node_get_nchildren(btree, node); + nchildren = nilfs_btree_node_get_nchildren(node); if (nchildren < nitems) nitems = nchildren; - dkeys = nilfs_btree_node_dkeys(btree, node); - dptrs = nilfs_btree_node_dptrs(btree, node); + dkeys = nilfs_btree_node_dkeys(node); + dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nitems; i++) { - keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); - ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); + keys[i] = le64_to_cpu(dkeys[i]); + ptrs[i] = le64_to_cpu(dptrs[i]); } if (bh != NULL) - nilfs_bmap_put_block(bmap, bh); + brelse(bh); return nitems; } static int -nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, +nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head **bhp, struct nilfs_bmap_stats *stats) { struct buffer_head *bh; - struct nilfs_btree *btree; + struct inode *dat = NULL; int ret; - btree = (struct nilfs_btree *)bmap; stats->bs_nblocks = 0; /* for data */ /* cannot find near ptr */ - if (btree->bt_ops->btop_find_target != NULL) - dreq->bpr_ptr - = btree->bt_ops->btop_find_target(btree, NULL, key); - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq); + if (NILFS_BMAP_USE_VBN(btree)) { + dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); + dat = nilfs_bmap_get_dat(btree); + } + + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; @@ -1595,11 +1665,11 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, stats->bs_nblocks++; if (nreq != NULL) { nreq->bpr_ptr = dreq->bpr_ptr + 1; - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq); + ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); if (ret < 0) goto err_out_dreq; - ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh); + ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); if (ret < 0) goto err_out_nreq; @@ -1612,78 +1682,76 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* error */ err_out_nreq: - bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq); + nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); err_out_dreq: - bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq); + nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); stats->bs_nblocks = 0; return ret; } static void -nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, +nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, - int n, __u64 low, __u64 high, + int n, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head *bh) { - struct nilfs_btree *btree; struct nilfs_btree_node *node; + struct inode *dat; __u64 tmpptr; + int ncblk; /* free resources */ - if (bmap->b_ops->bop_clear != NULL) - bmap->b_ops->bop_clear(bmap); + if (btree->b_ops->bop_clear != NULL) + btree->b_ops->bop_clear(btree); /* ptr must be a pointer to a buffer head. */ set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); /* convert and insert */ - btree = (struct nilfs_btree *)bmap; - nilfs_btree_init(bmap, low, high); + dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; + nilfs_btree_init(btree); if (nreq != NULL) { - if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) { - bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); - bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq); - } + nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); + nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); /* create child node at level 1 */ - lock_buffer(bh); node = (struct nilfs_btree_node *)bh->b_data; - nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); - nilfs_btree_node_insert(btree, node, - key, dreq->bpr_ptr, n); + ncblk = nilfs_btree_nchildren_per_block(btree); + nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); + nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); if (!buffer_dirty(bh)) - nilfs_btnode_mark_dirty(bh); - if (!nilfs_bmap_dirty(bmap)) - nilfs_bmap_set_dirty(bmap); + mark_buffer_dirty(bh); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); - unlock_buffer(bh); - nilfs_bmap_put_block(bmap, bh); + brelse(bh); /* create root node at level 2 */ node = nilfs_btree_get_root(btree); tmpptr = nreq->bpr_ptr; - nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, - 2, 1, &keys[0], &tmpptr); + nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, + NILFS_BTREE_ROOT_NCHILDREN_MAX, + &keys[0], &tmpptr); } else { - if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) - bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); + nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); /* create root node at level 1 */ node = nilfs_btree_get_root(btree); - nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, - 1, n, keys, ptrs); - nilfs_btree_node_insert(btree, node, - key, dreq->bpr_ptr, n); - if (!nilfs_bmap_dirty(bmap)) - nilfs_bmap_set_dirty(bmap); + nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, + NILFS_BTREE_ROOT_NCHILDREN_MAX, + keys, ptrs); + nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, + NILFS_BTREE_ROOT_NCHILDREN_MAX); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); } - if (btree->bt_ops->btop_set_target != NULL) - btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr); + if (NILFS_BMAP_USE_VBN(btree)) + nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); } /** @@ -1694,13 +1762,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, * @keys: * @ptrs: * @n: - * @low: - * @high: */ -int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, +int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, - const __u64 *keys, const __u64 *ptrs, - int n, __u64 low, __u64 high) + const __u64 *keys, const __u64 *ptrs, int n) { struct buffer_head *bh; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; @@ -1711,7 +1776,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, di = &dreq; ni = NULL; } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( - 1 << bmap->b_inode->i_blkbits)) { + 1 << btree->b_inode->i_blkbits)) { di = &dreq; ni = &nreq; } else { @@ -1720,43 +1785,42 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, BUG(); } - ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh, + ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, &stats); if (ret < 0) return ret; - nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, - low, high, di, ni, bh); - nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); + nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, + di, ni, bh); + nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); return 0; } -static int nilfs_btree_propagate_p(struct nilfs_btree *btree, +static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); return 0; } -static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, +static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, - int level) + int level, struct inode *dat) { struct nilfs_btree_node *parent; - int ret; + int ncmax, ret; - parent = nilfs_btree_get_node(btree, path, level + 1); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); path[level].bp_oldreq.bpr_ptr = - nilfs_btree_node_get_ptr(btree, parent, - path[level + 1].bp_index); + nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, + ncmax); path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; - ret = nilfs_bmap_prepare_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, + &path[level].bp_newreq.bpr_req); if (ret < 0) return ret; @@ -1765,12 +1829,12 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); if (ret < 0) { - nilfs_bmap_abort_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_dat_abort_update(dat, + &path[level].bp_oldreq.bpr_req, + &path[level].bp_newreq.bpr_req); return ret; } } @@ -1778,52 +1842,52 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, return 0; } -static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, +static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, - int level) + int level, struct inode *dat) { struct nilfs_btree_node *parent; + int ncmax; - nilfs_bmap_commit_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, + &path[level].bp_newreq.bpr_req, + btree->b_ptr_type == NILFS_BMAP_PTR_VS); if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } set_buffer_nilfs_volatile(path[level].bp_bh); - parent = nilfs_btree_get_node(btree, path, level + 1); - nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index, - path[level].bp_newreq.bpr_ptr); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); + nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, + path[level].bp_newreq.bpr_ptr, ncmax); } -static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, +static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, - int level) + int level, struct inode *dat) { - nilfs_bmap_abort_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, + &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); } -static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, +static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, - int minlevel, - int *maxlevelp) + int minlevel, int *maxlevelp, + struct inode *dat) { int level, ret; level = minlevel; if (!buffer_nilfs_volatile(path[level].bp_bh)) { - ret = nilfs_btree_prepare_update_v(btree, path, level); + ret = nilfs_btree_prepare_update_v(btree, path, level, dat); if (ret < 0) return ret; } @@ -1831,7 +1895,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, !buffer_dirty(path[level].bp_bh)) { WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); - ret = nilfs_btree_prepare_update_v(btree, path, level); + ret = nilfs_btree_prepare_update_v(btree, path, level, dat); if (ret < 0) goto out; } @@ -1843,52 +1907,55 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, /* error */ out: while (--level > minlevel) - nilfs_btree_abort_update_v(btree, path, level); + nilfs_btree_abort_update_v(btree, path, level, dat); if (!buffer_nilfs_volatile(path[level].bp_bh)) - nilfs_btree_abort_update_v(btree, path, level); + nilfs_btree_abort_update_v(btree, path, level, dat); return ret; } -static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, +static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, - int minlevel, - int maxlevel, - struct buffer_head *bh) + int minlevel, int maxlevel, + struct buffer_head *bh, + struct inode *dat) { int level; if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) - nilfs_btree_commit_update_v(btree, path, minlevel); + nilfs_btree_commit_update_v(btree, path, minlevel, dat); for (level = minlevel + 1; level <= maxlevel; level++) - nilfs_btree_commit_update_v(btree, path, level); + nilfs_btree_commit_update_v(btree, path, level, dat); } -static int nilfs_btree_propagate_v(struct nilfs_btree *btree, +static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, - int level, - struct buffer_head *bh) + int level, struct buffer_head *bh) { - int maxlevel, ret; + int maxlevel = 0, ret; struct nilfs_btree_node *parent; + struct inode *dat = nilfs_bmap_get_dat(btree); __u64 ptr; + int ncmax; get_bh(bh); path[level].bp_bh = bh; - ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel); + ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, + dat); if (ret < 0) goto out; if (buffer_nilfs_volatile(path[level].bp_bh)) { - parent = nilfs_btree_get_node(btree, path, level + 1); - ptr = nilfs_btree_node_get_ptr(btree, parent, - path[level + 1].bp_index); - ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); + ptr = nilfs_btree_node_get_ptr(parent, + path[level + 1].bp_index, + ncmax); + ret = nilfs_dat_mark_dirty(dat, ptr); if (ret < 0) goto out; } - nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh); + nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); out: brelse(path[level].bp_bh); @@ -1896,10 +1963,9 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree, return ret; } -static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, +static int nilfs_btree_propagate(struct nilfs_bmap *btree, struct buffer_head *bh) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; @@ -1907,22 +1973,20 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, WARN_ON(!buffer_dirty(bh)); - btree = (struct nilfs_btree *)bmap; - path = nilfs_btree_alloc_path(btree); + path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - nilfs_btree_init_path(btree, path); if (buffer_nilfs_node(bh)) { node = (struct nilfs_btree_node *)bh->b_data; - key = nilfs_btree_node_get_key(btree, node, 0); - level = nilfs_btree_node_get_level(btree, node); + key = nilfs_btree_node_get_key(node, 0); + level = nilfs_btree_node_get_level(node); } else { - key = nilfs_bmap_data_get_key(bmap, bh); + key = nilfs_bmap_data_get_key(btree, bh); level = NILFS_BTREE_LEVEL_DATA; } - ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); + ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { if (unlikely(ret == -ENOENT)) printk(KERN_CRIT "%s: key = %llu, level == %d\n", @@ -1930,22 +1994,23 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, goto out; } - ret = btree->bt_ops->btop_propagate(btree, path, level, bh); + ret = NILFS_BMAP_USE_VBN(btree) ? + nilfs_btree_propagate_v(btree, path, level, bh) : + nilfs_btree_propagate_p(btree, path, level, bh); out: - nilfs_btree_clear_path(btree, path); - nilfs_btree_free_path(btree, path); + nilfs_btree_free_path(path); return ret; } -static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, +static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, struct buffer_head *bh) { - return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr); + return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); } -static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, +static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, struct list_head *lists, struct buffer_head *bh) { @@ -1957,23 +2022,34 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, get_bh(bh); node = (struct nilfs_btree_node *)bh->b_data; - key = nilfs_btree_node_get_key(btree, node, 0); - level = nilfs_btree_node_get_level(btree, node); + key = nilfs_btree_node_get_key(node, 0); + level = nilfs_btree_node_get_level(node); + if (level < NILFS_BTREE_LEVEL_NODE_MIN || + level >= NILFS_BTREE_LEVEL_MAX) { + dump_stack(); + printk(KERN_WARNING + "%s: invalid btree level: %d (key=%llu, ino=%lu, " + "blocknr=%llu)\n", + __func__, level, (unsigned long long)key, + NILFS_BMAP_I(btree)->vfs_inode.i_ino, + (unsigned long long)bh->b_blocknr); + return; + } + list_for_each(head, &lists[level]) { cbh = list_entry(head, struct buffer_head, b_assoc_buffers); cnode = (struct nilfs_btree_node *)cbh->b_data; - ckey = nilfs_btree_node_get_key(btree, cnode, 0); + ckey = nilfs_btree_node_get_key(cnode, 0); if (key < ckey) break; } list_add_tail(&bh->b_assoc_buffers, head); } -static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, +static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { - struct nilfs_btree *btree = (struct nilfs_btree *)bmap; - struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache; + struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct pagevec pvec; struct buffer_head *bh, *head; @@ -2004,10 +2080,10 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) - list_splice(&lists[level], listp->prev); + list_splice_tail(&lists[level], listp); } -static int nilfs_btree_assign_p(struct nilfs_btree *btree, +static int nilfs_btree_assign_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, @@ -2017,39 +2093,38 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree, struct nilfs_btree_node *parent; __u64 key; __u64 ptr; - int ret; + int ncmax, ret; - parent = nilfs_btree_get_node(btree, path, level + 1); - ptr = nilfs_btree_node_get_ptr(btree, parent, - path[level + 1].bp_index); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); + ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, + ncmax); if (buffer_nilfs_node(*bh)) { path[level].bp_ctxt.oldkey = ptr; path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } - nilfs_btree_node_set_ptr(btree, parent, - path[level + 1].bp_index, blocknr); + nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, + ncmax); - key = nilfs_btree_node_get_key(btree, parent, - path[level + 1].bp_index); + key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ - binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; return 0; } -static int nilfs_btree_assign_v(struct nilfs_btree *btree, +static int nilfs_btree_assign_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, @@ -2057,140 +2132,131 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, union nilfs_binfo *binfo) { struct nilfs_btree_node *parent; + struct inode *dat = nilfs_bmap_get_dat(btree); __u64 key; __u64 ptr; union nilfs_bmap_ptr_req req; - int ret; + int ncmax, ret; - parent = nilfs_btree_get_node(btree, path, level + 1); - ptr = nilfs_btree_node_get_ptr(btree, parent, - path[level + 1].bp_index); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); + ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, + ncmax); req.bpr_ptr = ptr; - ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap, - &req); + ret = nilfs_dat_prepare_start(dat, &req.bpr_req); if (ret < 0) return ret; - btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap, - &req, blocknr); + nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); - key = nilfs_btree_node_get_key(btree, parent, - path[level + 1].bp_index); + key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ - binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); - binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); + binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } -static int nilfs_btree_assign(struct nilfs_bmap *bmap, +static int nilfs_btree_assign(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; int level, ret; - btree = (struct nilfs_btree *)bmap; - path = nilfs_btree_alloc_path(btree); + path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - nilfs_btree_init_path(btree, path); if (buffer_nilfs_node(*bh)) { node = (struct nilfs_btree_node *)(*bh)->b_data; - key = nilfs_btree_node_get_key(btree, node, 0); - level = nilfs_btree_node_get_level(btree, node); + key = nilfs_btree_node_get_key(node, 0); + level = nilfs_btree_node_get_level(node); } else { - key = nilfs_bmap_data_get_key(bmap, *bh); + key = nilfs_bmap_data_get_key(btree, *bh); level = NILFS_BTREE_LEVEL_DATA; } - ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); + ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } - ret = btree->bt_ops->btop_assign(btree, path, level, bh, - blocknr, binfo); + ret = NILFS_BMAP_USE_VBN(btree) ? + nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : + nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); out: - nilfs_btree_clear_path(btree, path); - nilfs_btree_free_path(btree, path); + nilfs_btree_free_path(path); return ret; } -static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, +static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { - struct nilfs_btree *btree; struct nilfs_btree_node *node; __u64 key; int ret; - btree = (struct nilfs_btree *)bmap; - ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr); + ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, + blocknr); if (ret < 0) return ret; if (buffer_nilfs_node(*bh)) { node = (struct nilfs_btree_node *)(*bh)->b_data; - key = nilfs_btree_node_get_key(btree, node, 0); + key = nilfs_btree_node_get_key(node, 0); } else - key = nilfs_bmap_data_get_key(bmap, *bh); + key = nilfs_bmap_data_get_key(btree, *bh); /* on-disk format */ binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); - binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } -static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) +static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) { struct buffer_head *bh; - struct nilfs_btree *btree; struct nilfs_btree_path *path; __u64 ptr; int ret; - btree = (struct nilfs_btree *)bmap; - path = nilfs_btree_alloc_path(btree); + path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - nilfs_btree_init_path(btree, path); - ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); + ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } if (!buffer_dirty(bh)) - nilfs_btnode_mark_dirty(bh); - nilfs_bmap_put_block(&btree->bt_bmap, bh); - if (!nilfs_bmap_dirty(&btree->bt_bmap)) - nilfs_bmap_set_dirty(&btree->bt_bmap); + mark_buffer_dirty(bh); + brelse(bh); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); out: - nilfs_btree_clear_path(btree, path); - nilfs_btree_free_path(btree, path); + nilfs_btree_free_path(path); return ret; } static const struct nilfs_bmap_operations nilfs_btree_ops = { .bop_lookup = nilfs_btree_lookup, + .bop_lookup_contig = nilfs_btree_lookup_contig, .bop_insert = nilfs_btree_insert, .bop_delete = nilfs_btree_delete, .bop_clear = NULL, @@ -2210,6 +2276,7 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = { static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_lookup = NULL, + .bop_lookup_contig = NULL, .bop_insert = NULL, .bop_delete = NULL, .bop_clear = NULL, @@ -2227,43 +2294,17 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_gather_data = NULL, }; -static const struct nilfs_btree_operations nilfs_btree_ops_v = { - .btop_find_target = nilfs_btree_find_target_v, - .btop_set_target = nilfs_btree_set_target_v, - .btop_propagate = nilfs_btree_propagate_v, - .btop_assign = nilfs_btree_assign_v, -}; - -static const struct nilfs_btree_operations nilfs_btree_ops_p = { - .btop_find_target = NULL, - .btop_set_target = NULL, - .btop_propagate = nilfs_btree_propagate_p, - .btop_assign = nilfs_btree_assign_p, -}; - -int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) +int nilfs_btree_init(struct nilfs_bmap *bmap) { - struct nilfs_btree *btree; - - btree = (struct nilfs_btree *)bmap; bmap->b_ops = &nilfs_btree_ops; - bmap->b_low = low; - bmap->b_high = high; - switch (bmap->b_inode->i_ino) { - case NILFS_DAT_INO: - btree->bt_ops = &nilfs_btree_ops_p; - break; - default: - btree->bt_ops = &nilfs_btree_ops_v; - break; - } - + bmap->b_nchildren_per_block = + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); return 0; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) { - bmap->b_low = NILFS_BMAP_LARGE_LOW; - bmap->b_high = NILFS_BMAP_LARGE_HIGH; bmap->b_ops = &nilfs_btree_ops_gc; + bmap->b_nchildren_per_block = + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); } diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 4766deb52fb..22c02e35b6e 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -30,66 +30,26 @@ #include "btnode.h" #include "bmap.h" -struct nilfs_btree; -struct nilfs_btree_path; - -/** - * struct nilfs_btree_operations - B-tree operation table - */ -struct nilfs_btree_operations { - __u64 (*btop_find_target)(const struct nilfs_btree *, - const struct nilfs_btree_path *, __u64); - void (*btop_set_target)(struct nilfs_btree *, __u64, __u64); - - struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *); - - int (*btop_propagate)(struct nilfs_btree *, - struct nilfs_btree_path *, - int, - struct buffer_head *); - int (*btop_assign)(struct nilfs_btree *, - struct nilfs_btree_path *, - int, - struct buffer_head **, - sector_t, - union nilfs_binfo *); -}; - -/** - * struct nilfs_btree_node - B-tree node - * @bn_flags: flags - * @bn_level: level - * @bn_nchildren: number of children - * @bn_pad: padding - */ -struct nilfs_btree_node { - __u8 bn_flags; - __u8 bn_level; - __le16 bn_nchildren; - __le32 bn_pad; -}; - -/* flags */ -#define NILFS_BTREE_NODE_ROOT 0x01 - -/* level */ -#define NILFS_BTREE_LEVEL_DATA 0 -#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) -#define NILFS_BTREE_LEVEL_MAX 14 - /** - * struct nilfs_btree - B-tree structure - * @bt_bmap: bmap base structure - * @bt_ops: B-tree operation table + * struct nilfs_btree_path - A path on which B-tree operations are executed + * @bp_bh: buffer head of node block + * @bp_sib_bh: buffer head of sibling node block + * @bp_index: index of child node + * @bp_oldreq: ptr end request for old ptr + * @bp_newreq: ptr alloc request for new ptr + * @bp_op: rebalance operation */ -struct nilfs_btree { - struct nilfs_bmap bt_bmap; - - /* B-tree-specific members */ - const struct nilfs_btree_operations *bt_ops; +struct nilfs_btree_path { + struct buffer_head *bp_bh; + struct buffer_head *bp_sib_bh; + int bp_index; + union nilfs_bmap_ptr_req bp_oldreq; + union nilfs_bmap_ptr_req bp_newreq; + struct nilfs_btnode_chkey_ctxt bp_ctxt; + void (*bp_op)(struct nilfs_bmap *, struct nilfs_btree_path *, + int, __u64 *, __u64 *); }; - #define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE #define NILFS_BTREE_ROOT_NCHILDREN_MAX \ ((NILFS_BTREE_ROOT_SIZE - sizeof(struct nilfs_btree_node)) / \ @@ -105,13 +65,13 @@ struct nilfs_btree { #define NILFS_BTREE_KEY_MIN ((__u64)0) #define NILFS_BTREE_KEY_MAX (~(__u64)0) +extern struct kmem_cache *nilfs_btree_path_cache; -int nilfs_btree_path_cache_init(void); -void nilfs_btree_path_cache_destroy(void); -int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64); +int nilfs_btree_init(struct nilfs_bmap *); int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, - const __u64 *, const __u64 *, - int, __u64, __u64); + const __u64 *, const __u64 *, int); void nilfs_btree_init_gc(struct nilfs_bmap *); +int nilfs_btree_broken_node_block(struct buffer_head *bh); + #endif /* _NILFS_BTREE_H */ diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 300f1cdfa86..0d58075f34e 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, if (!nilfs_cpfile_is_in_first(cpfile, cno)) nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, kaddr, 1); - nilfs_mdt_mark_buffer_dirty(cp_bh); + mark_buffer_dirty(cp_bh); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, 1); - kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); + kunmap_atomic(kaddr); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); } @@ -286,7 +286,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, __u64 cno; void *kaddr; unsigned long tnicps; - int ret, ncps, nicps, count, i; + int ret, ncps, nicps, nss, count, i; if (unlikely(start == 0 || start > end)) { printk(KERN_ERR "%s: invalid range of checkpoint numbers: " @@ -295,75 +295,79 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, return -EINVAL; } - /* cannot delete the latest checkpoint */ - if (start == nilfs_mdt_cno(cpfile) - 1) - return -EPERM; - down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) goto out_sem; tnicps = 0; + nss = 0; for (cno = start; cno < end; cno += ncps) { ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end); ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) { if (ret != -ENOENT) - goto out_header; + break; /* skip hole */ ret = 0; continue; } - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint( cpfile, cno, cp_bh, kaddr); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { - WARN_ON(nilfs_checkpoint_snapshot(cp)); - if (!nilfs_checkpoint_invalid(cp)) { + if (nilfs_checkpoint_snapshot(cp)) { + nss++; + } else if (!nilfs_checkpoint_invalid(cp)) { nilfs_checkpoint_set_invalid(cp); nicps++; } } if (nicps > 0) { tnicps += nicps; - nilfs_mdt_mark_buffer_dirty(cp_bh); + mark_buffer_dirty(cp_bh); nilfs_mdt_mark_dirty(cpfile); - if (!nilfs_cpfile_is_in_first(cpfile, cno) && - (count = nilfs_cpfile_block_sub_valid_checkpoints( - cpfile, cp_bh, kaddr, nicps)) == 0) { - /* make hole */ - kunmap_atomic(kaddr, KM_USER0); - brelse(cp_bh); - ret = nilfs_cpfile_delete_checkpoint_block( - cpfile, cno); - if (ret == 0) - continue; - printk(KERN_ERR "%s: cannot delete block\n", - __func__); - goto out_header; + if (!nilfs_cpfile_is_in_first(cpfile, cno)) { + count = + nilfs_cpfile_block_sub_valid_checkpoints( + cpfile, cp_bh, kaddr, nicps); + if (count == 0) { + /* make hole */ + kunmap_atomic(kaddr); + brelse(cp_bh); + ret = + nilfs_cpfile_delete_checkpoint_block( + cpfile, cno); + if (ret == 0) + continue; + printk(KERN_ERR + "%s: cannot delete block\n", + __func__); + break; + } } } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(cp_bh); } if (tnicps > 0) { - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } - out_header: brelse(header_bh); + if (nss > 0) + ret = -EBUSY; out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); @@ -384,9 +388,10 @@ static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile, } static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { struct nilfs_checkpoint *cp; + struct nilfs_cpinfo *ci = buf; struct buffer_head *bh; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; @@ -407,20 +412,25 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, continue; /* skip hole */ } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { - if (!nilfs_checkpoint_invalid(cp)) - nilfs_cpfile_checkpoint_to_cpinfo( - cpfile, cp, &ci[n++]); + if (!nilfs_checkpoint_invalid(cp)) { + nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, + ci); + ci = (void *)ci + cisz; + n++; + } } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); } ret = n; - if (n > 0) - *cnop = ci[n - 1].ci_cno + 1; + if (n > 0) { + ci = (void *)ci - cisz; + *cnop = ci->ci_cno + 1; + } out: up_read(&NILFS_MDT(cpfile)->mi_sem); @@ -428,11 +438,12 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, } static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { struct buffer_head *bh; struct nilfs_cpfile_header *header; struct nilfs_checkpoint *cp; + struct nilfs_cpinfo *ci = buf; __u64 curr = *cnop, next; unsigned long curr_blkoff, next_blkoff; void *kaddr; @@ -444,10 +455,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); if (curr == 0) { ret = 0; @@ -465,21 +476,23 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = 0; /* No snapshots (started from a hole block) */ goto out; } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); while (n < nci) { cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); curr = ~(__u64)0; /* Terminator */ if (unlikely(nilfs_checkpoint_invalid(cp) || !nilfs_checkpoint_snapshot(cp))) break; - nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]); + nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, ci); + ci = (void *)ci + cisz; + n++; next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); if (next == 0) break; /* reach end of the snapshot list */ next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); if (curr_blkoff != next_blkoff) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &bh); @@ -487,12 +500,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, WARN_ON(ret == -ENOENT); goto out; } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); } curr = next; curr_blkoff = next_blkoff; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); *cnop = curr; ret = n; @@ -511,13 +524,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, */ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { switch (mode) { case NILFS_CHECKPOINT: - return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci); + return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, buf, cisz, nci); case NILFS_SNAPSHOT: - return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci); + return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, buf, cisz, nci); default: return -EINVAL; } @@ -533,20 +546,14 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) struct nilfs_cpinfo ci; __u64 tcno = cno; ssize_t nci; - int ret; - nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); + nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, sizeof(ci), 1); if (nci < 0) return nci; else if (nci == 0 || ci.ci_cno != cno) return -ENOENT; - - /* cannot delete the latest checkpoint nor snapshots */ - ret = nilfs_cpinfo_snapshot(&ci); - if (ret < 0) - return ret; - else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1) - return -EPERM; + else if (nilfs_cpinfo_snapshot(&ci)) + return -EBUSY; return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); } @@ -589,24 +596,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } if (nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) goto out_cp; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); list = &header->ch_snapshot_list; curr_bh = header_bh; @@ -618,13 +625,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); curr = prev; if (curr_blkoff != prev_blkoff) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(curr_bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &curr_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); + kaddr = kmap_atomic(curr_bh->b_page); } curr_blkoff = prev_blkoff; cp = nilfs_cpfile_block_get_checkpoint( @@ -632,7 +639,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) list = &cp->cp_snapshot_list; prev = le64_to_cpu(list->ssl_prev); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, @@ -644,34 +651,34 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) get_bh(prev_bh); } - kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); + kaddr = kmap_atomic(curr_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, curr, curr_bh, kaddr); list->ssl_prev = cpu_to_le64(cno); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); nilfs_checkpoint_set_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); + kaddr = kmap_atomic(prev_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, prev, prev_bh, kaddr); list->ssl_next = cpu_to_le64(cno); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_nsnapshots, 1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(prev_bh); - nilfs_mdt_mark_buffer_dirty(curr_bh); - nilfs_mdt_mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(prev_bh); + mark_buffer_dirty(curr_bh); + mark_buffer_dirty(cp_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); brelse(prev_bh); @@ -707,23 +714,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } if (!nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } list = &cp->cp_snapshot_list; next = le64_to_cpu(list->ssl_next); prev = le64_to_cpu(list->ssl_prev); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) @@ -747,34 +754,34 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) get_bh(prev_bh); } - kaddr = kmap_atomic(next_bh->b_page, KM_USER0); + kaddr = kmap_atomic(next_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, next, next_bh, kaddr); list->ssl_prev = cpu_to_le64(prev); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); + kaddr = kmap_atomic(prev_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, prev, prev_bh, kaddr); list->ssl_next = cpu_to_le64(next); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); nilfs_checkpoint_clear_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_nsnapshots, -1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(next_bh); - nilfs_mdt_mark_buffer_dirty(prev_bh); - nilfs_mdt_mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(next_bh); + mark_buffer_dirty(prev_bh); + mark_buffer_dirty(cp_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); brelse(prev_bh); @@ -817,17 +824,22 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) void *kaddr; int ret; - if (cno == 0) - return -ENOENT; /* checkpoint number 0 is invalid */ + /* CP number is invalid if it's zero or larger than the + largest exist one.*/ + if (cno == 0 || cno >= nilfs_mdt_cno(cpfile)) + return -ENOENT; down_read(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); - ret = nilfs_checkpoint_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + if (nilfs_checkpoint_invalid(cp)) + ret = -ENOENT; + else + ret = nilfs_checkpoint_snapshot(cp); + kunmap_atomic(kaddr); brelse(bh); out: @@ -855,29 +867,20 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) */ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) { - struct the_nilfs *nilfs; int ret; - nilfs = NILFS_MDT(cpfile)->mi_nilfs; - switch (mode) { case NILFS_CHECKPOINT: - /* - * Check for protecting existing snapshot mounts: - * bd_mount_sem is used to make this operation atomic and - * exclusive with a new mount job. Though it doesn't cover - * umount, it's enough for the purpose. - */ - down(&nilfs->ns_bdev->bd_mount_sem); - if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { - /* Current implementation does not have to protect - plain read-only mounts since they are exclusive - with a read/write mount and are protected from the - cleaner. */ + if (nilfs_checkpoint_is_mounted(cpfile->i_sb, cno)) + /* + * Current implementation does not have to protect + * plain read-only mounts since they are exclusive + * with a read/write mount and are protected from the + * cleaner. + */ ret = -EBUSY; - } else + else ret = nilfs_cpfile_clear_snapshot(cpfile, cno); - up(&nilfs->ns_bdev->bd_mount_sem); return ret; case NILFS_SNAPSHOT: return nilfs_cpfile_set_snapshot(cpfile, cno); @@ -913,15 +916,66 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); cpstat->cs_cno = nilfs_mdt_cno(cpfile); cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); out_sem: up_read(&NILFS_MDT(cpfile)->mi_sem); return ret; } + +/** + * nilfs_cpfile_read - read or get cpfile inode + * @sb: super block instance + * @cpsize: size of a checkpoint entry + * @raw_inode: on-disk cpfile inode + * @inodep: buffer to store the inode + */ +int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, + struct nilfs_inode *raw_inode, struct inode **inodep) +{ + struct inode *cpfile; + int err; + + if (cpsize > sb->s_blocksize) { + printk(KERN_ERR + "NILFS: too large checkpoint size: %zu bytes.\n", + cpsize); + return -EINVAL; + } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) { + printk(KERN_ERR + "NILFS: too small checkpoint size: %zu bytes.\n", + cpsize); + return -EINVAL; + } + + cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); + if (unlikely(!cpfile)) + return -ENOMEM; + if (!(cpfile->i_state & I_NEW)) + goto out; + + err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0); + if (err) + goto failed; + + nilfs_mdt_set_entry_size(cpfile, cpsize, + sizeof(struct nilfs_cpfile_header)); + + err = nilfs_read_inode_common(cpfile, raw_inode); + if (err) + goto failed; + + unlock_new_inode(cpfile); + out: + *inodep = cpfile; + return 0; + failed: + iget_failed(cpfile); + return err; +} diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index 1a8a1008c34..a242b9a314f 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -27,8 +27,6 @@ #include <linux/buffer_head.h> #include <linux/nilfs2_fs.h> -#define NILFS_CPFILE_GFP NILFS_MDT_GFP - int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, struct nilfs_checkpoint **, @@ -39,7 +37,10 @@ int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); int nilfs_cpfile_is_snapshot(struct inode *, __u64); int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); -ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, - struct nilfs_cpinfo *, size_t); +ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, + size_t); + +int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, + struct nilfs_inode *raw_inode, struct inode **inodep); #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index bb8a5818e7f..0d5fada9119 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -33,6 +33,23 @@ #define NILFS_CNO_MIN ((__u64)1) #define NILFS_CNO_MAX (~(__u64)0) +/** + * struct nilfs_dat_info - on-memory private data of DAT file + * @mi: on-memory private data of metadata file + * @palloc_cache: persistent object allocator cache of DAT file + * @shadow: shadow map of DAT file + */ +struct nilfs_dat_info { + struct nilfs_mdt_info mi; + struct nilfs_palloc_cache palloc_cache; + struct nilfs_shadow_map shadow; +}; + +static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) +{ + return (struct nilfs_dat_info *)NILFS_MDT(dat); +} + static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { @@ -43,7 +60,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat, static void nilfs_dat_commit_entry(struct inode *dat, struct nilfs_palloc_req *req) { - nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); + mark_buffer_dirty(req->pr_entry_bh); nilfs_mdt_mark_dirty(dat); brelse(req->pr_entry_bh); } @@ -74,13 +91,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MAX); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_palloc_commit_alloc_entry(dat, req); nilfs_dat_commit_entry(dat, req); @@ -92,44 +109,24 @@ void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) nilfs_palloc_abort_alloc_entry(dat, req); } -int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req) -{ - int ret; - - ret = nilfs_palloc_prepare_free_entry(dat, req); - if (ret < 0) - return ret; - ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - nilfs_palloc_abort_free_entry(dat, req); - return ret; - } - return 0; -} - -void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) +static void nilfs_dat_commit_free(struct inode *dat, + struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MIN); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); nilfs_palloc_commit_free_entry(dat, req); } -void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req) -{ - nilfs_dat_abort_entry(dat, req); - nilfs_palloc_abort_free_entry(dat, req); -} - int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) { int ret; @@ -145,30 +142,16 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); - if (entry->de_blocknr != cpu_to_le64(0) || - entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) { - printk(KERN_CRIT - "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n", - __func__, (unsigned long long)req->pr_entry_nr, - (unsigned long long)le64_to_cpu(entry->de_start), - (unsigned long long)le64_to_cpu(entry->de_end), - (unsigned long long)le64_to_cpu(entry->de_blocknr)); - } entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); } -void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req) -{ - nilfs_dat_abort_entry(dat, req); -} - int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; @@ -183,12 +166,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) return ret; } - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (blocknr == 0) { ret = nilfs_palloc_prepare_free_entry(dat, req); @@ -209,7 +192,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, sector_t blocknr; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); end = start = le64_to_cpu(entry->de_start); @@ -219,7 +202,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (blocknr == 0) nilfs_dat_commit_free(dat, req); @@ -234,18 +217,49 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) sector_t blocknr; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (start == nilfs_mdt_cno(dat) && blocknr == 0) nilfs_palloc_abort_free_entry(dat, req); nilfs_dat_abort_entry(dat, req); } +int nilfs_dat_prepare_update(struct inode *dat, + struct nilfs_palloc_req *oldreq, + struct nilfs_palloc_req *newreq) +{ + int ret; + + ret = nilfs_dat_prepare_end(dat, oldreq); + if (!ret) { + ret = nilfs_dat_prepare_alloc(dat, newreq); + if (ret < 0) + nilfs_dat_abort_end(dat, oldreq); + } + return ret; +} + +void nilfs_dat_commit_update(struct inode *dat, + struct nilfs_palloc_req *oldreq, + struct nilfs_palloc_req *newreq, int dead) +{ + nilfs_dat_commit_end(dat, oldreq, dead); + nilfs_dat_commit_alloc(dat, newreq); +} + +void nilfs_dat_abort_update(struct inode *dat, + struct nilfs_palloc_req *oldreq, + struct nilfs_palloc_req *newreq) +{ + nilfs_dat_abort_end(dat, oldreq); + nilfs_dat_abort_alloc(dat, newreq); +} + /** * nilfs_dat_mark_dirty - * @dat: DAT file inode @@ -282,7 +296,7 @@ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) * @vblocknrs and @nitems. * * Return Value: On success, 0 is returned. On error, one of the following - * nagative error codes is returned. + * negative error codes is returned. * * %-EIO - I/O error. * @@ -321,22 +335,39 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); if (ret < 0) return ret; - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + + /* + * The given disk block number (blocknr) is not yet written to + * the device at this point. + * + * To prevent nilfs_dat_translate() from returning the + * uncommitted block number, this makes a copy of the entry + * buffer and redirects nilfs_dat_translate() to the copy. + */ + if (!buffer_nilfs_redirected(entry_bh)) { + ret = nilfs_mdt_freeze_buffer(dat, entry_bh); + if (ret) { + brelse(entry_bh); + return ret; + } + } + + kaddr = kmap_atomic(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, (unsigned long long)vblocknr, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end)); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); return -EINVAL; } WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(entry_bh); + mark_buffer_dirty(entry_bh); nilfs_mdt_mark_dirty(dat); brelse(entry_bh); @@ -365,7 +396,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) */ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) { - struct buffer_head *entry_bh; + struct buffer_head *entry_bh, *bh; struct nilfs_dat_entry *entry; sector_t blocknr; void *kaddr; @@ -375,56 +406,124 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) if (ret < 0) return ret; - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) { + bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh); + if (bh) { + WARN_ON(!buffer_uptodate(bh)); + brelse(entry_bh); + entry_bh = bh; + } + } + + kaddr = kmap_atomic(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); blocknr = le64_to_cpu(entry->de_blocknr); if (blocknr == 0) { ret = -ENOENT; goto out; } - if (blocknrp != NULL) - *blocknrp = blocknr; + *blocknrp = blocknr; out: - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); return ret; } -ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo, +ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, size_t nvi) { struct buffer_head *entry_bh; struct nilfs_dat_entry *entry; + struct nilfs_vinfo *vinfo = buf; __u64 first, last; void *kaddr; unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; int i, j, n, ret; for (i = 0; i < nvi; i += n) { - ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr, + ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr, 0, &entry_bh); if (ret < 0) return ret; - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(entry_bh->b_page); /* last virtual block number in this block */ - first = vinfo[i].vi_vblocknr; + first = vinfo->vi_vblocknr; do_div(first, entries_per_block); first *= entries_per_block; last = first + entries_per_block - 1; for (j = i, n = 0; - j < nvi && vinfo[j].vi_vblocknr >= first && - vinfo[j].vi_vblocknr <= last; - j++, n++) { + j < nvi && vinfo->vi_vblocknr >= first && + vinfo->vi_vblocknr <= last; + j++, n++, vinfo = (void *)vinfo + visz) { entry = nilfs_palloc_block_get_entry( - dat, vinfo[j].vi_vblocknr, entry_bh, kaddr); - vinfo[j].vi_start = le64_to_cpu(entry->de_start); - vinfo[j].vi_end = le64_to_cpu(entry->de_end); - vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr); + dat, vinfo->vi_vblocknr, entry_bh, kaddr); + vinfo->vi_start = le64_to_cpu(entry->de_start); + vinfo->vi_end = le64_to_cpu(entry->de_end); + vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); } return nvi; } + +/** + * nilfs_dat_read - read or get dat inode + * @sb: super block instance + * @entry_size: size of a dat entry + * @raw_inode: on-disk dat inode + * @inodep: buffer to store the inode + */ +int nilfs_dat_read(struct super_block *sb, size_t entry_size, + struct nilfs_inode *raw_inode, struct inode **inodep) +{ + static struct lock_class_key dat_lock_key; + struct inode *dat; + struct nilfs_dat_info *di; + int err; + + if (entry_size > sb->s_blocksize) { + printk(KERN_ERR + "NILFS: too large DAT entry size: %zu bytes.\n", + entry_size); + return -EINVAL; + } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { + printk(KERN_ERR + "NILFS: too small DAT entry size: %zu bytes.\n", + entry_size); + return -EINVAL; + } + + dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); + if (unlikely(!dat)) + return -ENOMEM; + if (!(dat->i_state & I_NEW)) + goto out; + + err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); + if (err) + goto failed; + + err = nilfs_palloc_init_blockgroup(dat, entry_size); + if (err) + goto failed; + + di = NILFS_DAT_I(dat); + lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); + nilfs_palloc_setup_cache(dat, &di->palloc_cache); + nilfs_mdt_setup_shadow_map(dat, &di->shadow); + + err = nilfs_read_inode_common(dat, raw_inode); + if (err) + goto failed; + + unlock_new_inode(dat); + out: + *inodep = dat; + return 0; + failed: + iget_failed(dat); + return err; +} diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index d9560654a4b..cbd8e973250 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -27,7 +27,6 @@ #include <linux/buffer_head.h> #include <linux/fs.h> -#define NILFS_DAT_GFP NILFS_MDT_GFP struct nilfs_palloc_req; @@ -39,14 +38,22 @@ void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *); int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *); void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *, sector_t); -void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *); int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *); void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int); void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); +int nilfs_dat_prepare_update(struct inode *, struct nilfs_palloc_req *, + struct nilfs_palloc_req *); +void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *, + struct nilfs_palloc_req *, int); +void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *, + struct nilfs_palloc_req *); int nilfs_dat_mark_dirty(struct inode *, __u64); int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); -ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t); +ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); + +int nilfs_dat_read(struct super_block *sb, size_t entry_size, + struct nilfs_inode *raw_inode, struct inode **inodep); #endif /* _NILFS_DAT_H */ diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 54100acc110..197a63e9d10 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -43,7 +43,6 @@ */ #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include "nilfs.h" #include "page.h" @@ -81,31 +80,17 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static int nilfs_prepare_chunk_uninterruptible(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) +static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to) { loff_t pos = page_offset(page) + from; - return block_write_begin(NULL, mapping, pos, to - from, - AOP_FLAG_UNINTERRUPTIBLE, &page, - NULL, nilfs_get_block); + return __block_write_begin(page, pos, to - from, nilfs_get_block); } -static int nilfs_prepare_chunk(struct page *page, +static void nilfs_commit_chunk(struct page *page, struct address_space *mapping, unsigned from, unsigned to) { - loff_t pos = page_offset(page) + from; - return block_write_begin(NULL, mapping, pos, to - from, 0, &page, - NULL, nilfs_get_block); -} - -static int nilfs_commit_chunk(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) -{ struct inode *dir = mapping->host; - struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); loff_t pos = page_offset(page) + from; unsigned len = to - from; unsigned nr_dirty, copied; @@ -113,15 +98,13 @@ static int nilfs_commit_chunk(struct page *page, nr_dirty = nilfs_page_count_clean_buffers(page, from, to); copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); - if (pos + copied > dir->i_size) { + if (pos + copied > dir->i_size) i_size_write(dir, pos + copied); - mark_inode_dirty(dir); - } if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); - err = nilfs_set_file_dirty(sbi, dir, nr_dirty); + err = nilfs_set_file_dirty(dir, nr_dirty); + WARN_ON(err); /* do not happen */ unlock_page(page); - return err; } static void nilfs_check_page(struct page *page) @@ -144,7 +127,7 @@ static void nilfs_check_page(struct page *page) } for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) { p = (struct nilfs_dir_entry *)(kaddr + offs); - rec_len = le16_to_cpu(p->rec_len); + rec_len = nilfs_rec_len_from_disk(p->rec_len); if (rec_len < NILFS_DIR_REC_LEN(1)) goto Eshort; @@ -202,13 +185,10 @@ fail: static struct page *nilfs_get_page(struct inode *dir, unsigned long n) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_cache_page(mapping, n, - (filler_t *)mapping->a_ops->readpage, NULL); + struct page *page = read_mapping_page(mapping, n, NULL); + if (!IS_ERR(page)) { - wait_on_page_locked(page); kmap(page); - if (!PageUptodate(page)) - goto fail; if (!PageChecked(page)) nilfs_check_page(page); if (PageError(page)) @@ -227,7 +207,7 @@ fail: * len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller. */ static int -nilfs_match(int len, const char * const name, struct nilfs_dir_entry *de) +nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de) { if (len != de->name_len) return 0; @@ -241,7 +221,8 @@ nilfs_match(int len, const char * const name, struct nilfs_dir_entry *de) */ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) { - return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); + return (struct nilfs_dir_entry *)((char *)p + + nilfs_rec_len_from_disk(p->rec_len)); } static unsigned char @@ -270,27 +251,23 @@ nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; } -static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +static int nilfs_readdir(struct file *file, struct dir_context *ctx) { - loff_t pos = filp->f_pos; - struct inode *inode = filp->f_dentry->d_inode; + loff_t pos = ctx->pos; + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; unsigned long npages = dir_pages(inode); /* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ - unsigned char *types = NULL; - int ret; if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) - goto success; - - types = nilfs_filetype_table; + return 0; for ( ; n < npages; n++, offset = 0) { char *kaddr, *limit; @@ -300,9 +277,8 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (IS_ERR(page)) { nilfs_error(sb, __func__, "bad page in #%lu", inode->i_ino); - filp->f_pos += PAGE_CACHE_SIZE - offset; - ret = -EIO; - goto done; + ctx->pos += PAGE_CACHE_SIZE - offset; + return -EIO; } kaddr = page_address(page); de = (struct nilfs_dir_entry *)(kaddr + offset); @@ -312,35 +288,28 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (de->rec_len == 0) { nilfs_error(sb, __func__, "zero-length directory entry"); - ret = -EIO; nilfs_put_page(page); - goto done; + return -EIO; } if (de->inode) { - int over; - unsigned char d_type = DT_UNKNOWN; + unsigned char t; - if (types && de->file_type < NILFS_FT_MAX) - d_type = types[de->file_type]; + if (de->file_type < NILFS_FT_MAX) + t = nilfs_filetype_table[de->file_type]; + else + t = DT_UNKNOWN; - offset = (char *)de - kaddr; - over = filldir(dirent, de->name, de->name_len, - (n<<PAGE_CACHE_SHIFT) | offset, - le64_to_cpu(de->inode), d_type); - if (over) { + if (!dir_emit(ctx, de->name, de->name_len, + le64_to_cpu(de->inode), t)) { nilfs_put_page(page); - goto success; + return 0; } } - filp->f_pos += le16_to_cpu(de->rec_len); + ctx->pos += nilfs_rec_len_from_disk(de->rec_len); } nilfs_put_page(page); } - -success: - ret = 0; -done: - return ret; + return 0; } /* @@ -352,11 +321,11 @@ done: * Entry is guaranteed to be valid. */ struct nilfs_dir_entry * -nilfs_find_entry(struct inode *dir, struct dentry *dentry, +nilfs_find_entry(struct inode *dir, const struct qstr *qstr, struct page **res_page) { - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; + const unsigned char *name = qstr->name; + int namelen = qstr->len; unsigned reclen = NILFS_DIR_REC_LEN(namelen); unsigned long start, n; unsigned long npages = dir_pages(dir); @@ -399,7 +368,7 @@ nilfs_find_entry(struct inode *dir, struct dentry *dentry, /* next page is past the blocks we've got */ if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { nilfs_error(dir->i_sb, __func__, - "dir %lu size %lld exceeds block cout %llu", + "dir %lu size %lld exceeds block count %llu", dir->i_ino, dir->i_size, (unsigned long long)dir->i_blocks); goto out; @@ -427,13 +396,13 @@ struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) return de; } -ino_t nilfs_inode_by_name(struct inode *dir, struct dentry *dentry) +ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) { ino_t res = 0; struct nilfs_dir_entry *de; struct page *page; - de = nilfs_find_entry(dir, dentry, &page); + de = nilfs_find_entry(dir, qstr, &page); if (de) { res = le64_to_cpu(de->inode); kunmap(page); @@ -447,20 +416,18 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, struct page *page, struct inode *inode) { unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + le16_to_cpu(de->rec_len); + unsigned to = from + nilfs_rec_len_from_disk(de->rec_len); struct address_space *mapping = page->mapping; int err; lock_page(page); - err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); BUG_ON(err); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); - err = nilfs_commit_chunk(page, mapping, from, to); + nilfs_commit_chunk(page, mapping, from, to); nilfs_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; -/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(dir); } /* @@ -469,7 +436,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, int nilfs_add_link(struct dentry *dentry, struct inode *inode) { struct inode *dir = dentry->d_parent->d_inode; - const char *name = dentry->d_name.name; + const unsigned char *name = dentry->d_name.name; int namelen = dentry->d_name.len; unsigned chunk_size = nilfs_chunk_size(dir); unsigned reclen = NILFS_DIR_REC_LEN(namelen); @@ -504,7 +471,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) /* We hit i_size */ name_len = 0; rec_len = chunk_size; - de->rec_len = cpu_to_le16(chunk_size); + de->rec_len = nilfs_rec_len_to_disk(chunk_size); de->inode = 0; goto got_it; } @@ -518,7 +485,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) if (nilfs_match(namelen, name, de)) goto out_unlock; name_len = NILFS_DIR_REC_LEN(de->name_len); - rec_len = le16_to_cpu(de->rec_len); + rec_len = nilfs_rec_len_from_disk(de->rec_len); if (!de->inode && rec_len >= reclen) goto got_it; if (rec_len >= name_len + reclen) @@ -534,25 +501,24 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) got_it: from = (char *)de - (char *)page_address(page); to = from + rec_len; - err = nilfs_prepare_chunk(page, page->mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); if (err) goto out_unlock; if (de->inode) { struct nilfs_dir_entry *de1; de1 = (struct nilfs_dir_entry *)((char *)de + name_len); - de1->rec_len = cpu_to_le16(rec_len - name_len); - de->rec_len = cpu_to_le16(name_len); + de1->rec_len = nilfs_rec_len_to_disk(rec_len - name_len); + de->rec_len = nilfs_rec_len_to_disk(name_len); de = de1; } de->name_len = namelen; memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); - err = nilfs_commit_chunk(page, page->mapping, from, to); + nilfs_commit_chunk(page, page->mapping, from, to); dir->i_mtime = dir->i_ctime = CURRENT_TIME; -/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(dir); + nilfs_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: nilfs_put_page(page); @@ -573,7 +539,8 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) struct inode *inode = mapping->host; char *kaddr = page_address(page); unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); - unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); + unsigned to = ((char *)dir - kaddr) + + nilfs_rec_len_from_disk(dir->rec_len); struct nilfs_dir_entry *pde = NULL; struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); int err; @@ -591,15 +558,13 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) if (pde) from = (char *)pde - (char *)page_address(page); lock_page(page); - err = nilfs_prepare_chunk(page, mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); BUG_ON(err); if (pde) - pde->rec_len = cpu_to_le16(to - from); + pde->rec_len = nilfs_rec_len_to_disk(to - from); dir->inode = 0; - err = nilfs_commit_chunk(page, mapping, from, to); + nilfs_commit_chunk(page, mapping, from, to); inode->i_ctime = inode->i_mtime = CURRENT_TIME; -/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(inode); out: nilfs_put_page(page); return err; @@ -620,28 +585,28 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) if (!page) return -ENOMEM; - err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); + err = nilfs_prepare_chunk(page, 0, chunk_size); if (unlikely(err)) { unlock_page(page); goto fail; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, 0, chunk_size); de = (struct nilfs_dir_entry *)kaddr; de->name_len = 1; - de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1)); + de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; - de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1)); + de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); nilfs_set_de_type(de, inode); - kunmap_atomic(kaddr, KM_USER0); - err = nilfs_commit_chunk(page, mapping, 0, chunk_size); + kunmap_atomic(kaddr); + nilfs_commit_chunk(page, mapping, 0, chunk_size); fail: page_cache_release(page); return err; @@ -698,13 +663,13 @@ not_empty: return 0; } -struct file_operations nilfs_dir_operations = { +const struct file_operations nilfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = nilfs_readdir, + .iterate = nilfs_readdir, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = nilfs_ioctl, + .compat_ioctl = nilfs_compat_ioctl, #endif /* CONFIG_COMPAT */ .fsync = nilfs_sync_file, diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index c6379e48278..82f4865e86d 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -25,182 +25,158 @@ #include "page.h" #include "direct.h" #include "alloc.h" +#include "dat.h" -static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) +static inline __le64 *nilfs_direct_dptrs(const struct nilfs_bmap *direct) { return (__le64 *) - ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1); + ((struct nilfs_direct_node *)direct->b_u.u_data + 1); } static inline __u64 -nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key) +nilfs_direct_get_ptr(const struct nilfs_bmap *direct, __u64 key) { - return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key)); + return le64_to_cpu(*(nilfs_direct_dptrs(direct) + key)); } -static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct, +static inline void nilfs_direct_set_ptr(struct nilfs_bmap *direct, __u64 key, __u64 ptr) { - *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr); + *(nilfs_direct_dptrs(direct) + key) = cpu_to_le64(ptr); } -static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, +static int nilfs_direct_lookup(const struct nilfs_bmap *direct, __u64 key, int level, __u64 *ptrp) { - struct nilfs_direct *direct; __u64 ptr; - direct = (struct nilfs_direct *)bmap; - if ((key > NILFS_DIRECT_KEY_MAX) || - (level != 1) || /* XXX: use macro for level 1 */ - ((ptr = nilfs_direct_get_ptr(direct, key)) == - NILFS_BMAP_INVALID_PTR)) + if (key > NILFS_DIRECT_KEY_MAX || level != 1) + return -ENOENT; + ptr = nilfs_direct_get_ptr(direct, key); + if (ptr == NILFS_BMAP_INVALID_PTR) return -ENOENT; - if (ptrp != NULL) - *ptrp = ptr; + *ptrp = ptr; return 0; } +static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, + __u64 key, __u64 *ptrp, + unsigned maxblocks) +{ + struct inode *dat = NULL; + __u64 ptr, ptr2; + sector_t blocknr; + int ret, cnt; + + if (key > NILFS_DIRECT_KEY_MAX) + return -ENOENT; + ptr = nilfs_direct_get_ptr(direct, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -ENOENT; + + if (NILFS_BMAP_USE_VBN(direct)) { + dat = nilfs_bmap_get_dat(direct); + ret = nilfs_dat_translate(dat, ptr, &blocknr); + if (ret < 0) + return ret; + ptr = blocknr; + } + + maxblocks = min_t(unsigned, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1); + for (cnt = 1; cnt < maxblocks && + (ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) != + NILFS_BMAP_INVALID_PTR; + cnt++) { + if (dat) { + ret = nilfs_dat_translate(dat, ptr2, &blocknr); + if (ret < 0) + return ret; + ptr2 = blocknr; + } + if (ptr2 != ptr + cnt) + break; + } + *ptrp = ptr; + return cnt; +} + static __u64 -nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) +nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key) { __u64 ptr; - ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key); + ptr = nilfs_bmap_find_target_seq(direct, key); if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; else /* block group */ - return nilfs_bmap_find_target_in_group(&direct->d_bmap); -} - -static void nilfs_direct_set_target_v(struct nilfs_direct *direct, - __u64 key, __u64 ptr) -{ - direct->d_bmap.b_last_allocated_key = key; - direct->d_bmap.b_last_allocated_ptr = ptr; -} - -static int nilfs_direct_prepare_insert(struct nilfs_direct *direct, - __u64 key, - union nilfs_bmap_ptr_req *req, - struct nilfs_bmap_stats *stats) -{ - int ret; - - if (direct->d_ops->dop_find_target != NULL) - req->bpr_ptr = direct->d_ops->dop_find_target(direct, key); - ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap, - req); - if (ret < 0) - return ret; - - stats->bs_nblocks = 1; - return 0; -} - -static void nilfs_direct_commit_insert(struct nilfs_direct *direct, - union nilfs_bmap_ptr_req *req, - __u64 key, __u64 ptr) -{ - struct buffer_head *bh; - - /* ptr must be a pointer to a buffer head. */ - bh = (struct buffer_head *)((unsigned long)ptr); - set_buffer_nilfs_volatile(bh); - - if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL) - direct->d_bmap.b_pops->bpop_commit_alloc_ptr( - &direct->d_bmap, req); - nilfs_direct_set_ptr(direct, key, req->bpr_ptr); - - if (!nilfs_bmap_dirty(&direct->d_bmap)) - nilfs_bmap_set_dirty(&direct->d_bmap); - - if (direct->d_ops->dop_set_target != NULL) - direct->d_ops->dop_set_target(direct, key, req->bpr_ptr); + return nilfs_bmap_find_target_in_group(direct); } static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) { - struct nilfs_direct *direct; union nilfs_bmap_ptr_req req; - struct nilfs_bmap_stats stats; + struct inode *dat = NULL; + struct buffer_head *bh; int ret; - direct = (struct nilfs_direct *)bmap; if (key > NILFS_DIRECT_KEY_MAX) return -ENOENT; - if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) + if (nilfs_direct_get_ptr(bmap, key) != NILFS_BMAP_INVALID_PTR) return -EEXIST; - ret = nilfs_direct_prepare_insert(direct, key, &req, &stats); - if (ret < 0) - return ret; - nilfs_direct_commit_insert(direct, &req, key, ptr); - nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); - - return 0; -} + if (NILFS_BMAP_USE_VBN(bmap)) { + req.bpr_ptr = nilfs_direct_find_target_v(bmap, key); + dat = nilfs_bmap_get_dat(bmap); + } + ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat); + if (!ret) { + /* ptr must be a pointer to a buffer head. */ + bh = (struct buffer_head *)((unsigned long)ptr); + set_buffer_nilfs_volatile(bh); -static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, - union nilfs_bmap_ptr_req *req, - __u64 key, - struct nilfs_bmap_stats *stats) -{ - int ret; + nilfs_bmap_commit_alloc_ptr(bmap, &req, dat); + nilfs_direct_set_ptr(bmap, key, req.bpr_ptr); - if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - req->bpr_ptr = nilfs_direct_get_ptr(direct, key); - ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr( - &direct->d_bmap, req); - if (ret < 0) - return ret; - } + if (!nilfs_bmap_dirty(bmap)) + nilfs_bmap_set_dirty(bmap); - stats->bs_nblocks = 1; - return 0; -} + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr); -static void nilfs_direct_commit_delete(struct nilfs_direct *direct, - union nilfs_bmap_ptr_req *req, - __u64 key) -{ - if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL) - direct->d_bmap.b_pops->bpop_commit_end_ptr( - &direct->d_bmap, req); - nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); + nilfs_inode_add_blocks(bmap->b_inode, 1); + } + return ret; } static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) { - struct nilfs_direct *direct; union nilfs_bmap_ptr_req req; - struct nilfs_bmap_stats stats; + struct inode *dat; int ret; - direct = (struct nilfs_direct *)bmap; - if ((key > NILFS_DIRECT_KEY_MAX) || - nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) + if (key > NILFS_DIRECT_KEY_MAX || + nilfs_direct_get_ptr(bmap, key) == NILFS_BMAP_INVALID_PTR) return -ENOENT; - ret = nilfs_direct_prepare_delete(direct, &req, key, &stats); - if (ret < 0) - return ret; - nilfs_direct_commit_delete(direct, &req, key); - nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); + dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; + req.bpr_ptr = nilfs_direct_get_ptr(bmap, key); - return 0; + ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat); + if (!ret) { + nilfs_bmap_commit_end_ptr(bmap, &req, dat); + nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR); + nilfs_inode_sub_blocks(bmap->b_inode, 1); + } + return ret; } -static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) +static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp) { - struct nilfs_direct *direct; __u64 key, lastkey; - direct = (struct nilfs_direct *)bmap; lastkey = NILFS_DIRECT_KEY_MAX + 1; for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) if (nilfs_direct_get_ptr(direct, key) != @@ -220,15 +196,13 @@ static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key) return key > NILFS_DIRECT_KEY_MAX; } -static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, +static int nilfs_direct_gather_data(struct nilfs_bmap *direct, __u64 *keys, __u64 *ptrs, int nitems) { - struct nilfs_direct *direct; __u64 key; __u64 ptr; int n; - direct = (struct nilfs_direct *)bmap; if (nitems > NILFS_DIRECT_NBLOCKS) nitems = NILFS_DIRECT_NBLOCKS; n = 0; @@ -244,10 +218,8 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, } int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, - __u64 key, __u64 *keys, __u64 *ptrs, - int n, __u64 low, __u64 high) + __u64 key, __u64 *keys, __u64 *ptrs, int n) { - struct nilfs_direct *direct; __le64 *dptrs; int ret, i, j; @@ -263,84 +235,73 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, bmap->b_ops->bop_clear(bmap); /* convert */ - direct = (struct nilfs_direct *)bmap; - dptrs = nilfs_direct_dptrs(direct); + dptrs = nilfs_direct_dptrs(bmap); for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { if ((j < n) && (i == keys[j])) { dptrs[i] = (i != key) ? - nilfs_bmap_ptr_to_dptr(ptrs[j]) : + cpu_to_le64(ptrs[j]) : NILFS_BMAP_INVALID_PTR; j++; } else dptrs[i] = NILFS_BMAP_INVALID_PTR; } - nilfs_direct_init(bmap, low, high); - + nilfs_direct_init(bmap); return 0; } -static int nilfs_direct_propagate_v(struct nilfs_direct *direct, - struct buffer_head *bh) +static int nilfs_direct_propagate(struct nilfs_bmap *bmap, + struct buffer_head *bh) { - union nilfs_bmap_ptr_req oldreq, newreq; + struct nilfs_palloc_req oldreq, newreq; + struct inode *dat; __u64 key; __u64 ptr; int ret; - key = nilfs_bmap_data_get_key(&direct->d_bmap, bh); - ptr = nilfs_direct_get_ptr(direct, key); + if (!NILFS_BMAP_USE_VBN(bmap)) + return 0; + + dat = nilfs_bmap_get_dat(bmap); + key = nilfs_bmap_data_get_key(bmap, bh); + ptr = nilfs_direct_get_ptr(bmap, key); if (!buffer_nilfs_volatile(bh)) { - oldreq.bpr_ptr = ptr; - newreq.bpr_ptr = ptr; - ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq, - &newreq); + oldreq.pr_entry_nr = ptr; + newreq.pr_entry_nr = ptr; + ret = nilfs_dat_prepare_update(dat, &oldreq, &newreq); if (ret < 0) return ret; - nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq); + nilfs_dat_commit_update(dat, &oldreq, &newreq, + bmap->b_ptr_type == NILFS_BMAP_PTR_VS); set_buffer_nilfs_volatile(bh); - nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); + nilfs_direct_set_ptr(bmap, key, newreq.pr_entry_nr); } else - ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr); + ret = nilfs_dat_mark_dirty(dat, ptr); return ret; } -static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, - struct buffer_head *bh) -{ - struct nilfs_direct *direct; - - direct = (struct nilfs_direct *)bmap; - return (direct->d_ops->dop_propagate != NULL) ? - direct->d_ops->dop_propagate(direct, bh) : - 0; -} - -static int nilfs_direct_assign_v(struct nilfs_direct *direct, +static int nilfs_direct_assign_v(struct nilfs_bmap *direct, __u64 key, __u64 ptr, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { + struct inode *dat = nilfs_bmap_get_dat(direct); union nilfs_bmap_ptr_req req; int ret; req.bpr_ptr = ptr; - ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr( - &direct->d_bmap, &req); - if (ret < 0) - return ret; - direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap, - &req, blocknr); - - binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); - binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); - - return 0; + ret = nilfs_dat_prepare_start(dat, &req.bpr_req); + if (!ret) { + nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); + binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); + binfo->bi_v.bi_blkoff = cpu_to_le64(key); + } + return ret; } -static int nilfs_direct_assign_p(struct nilfs_direct *direct, +static int nilfs_direct_assign_p(struct nilfs_bmap *direct, __u64 key, __u64 ptr, struct buffer_head **bh, sector_t blocknr, @@ -348,7 +309,7 @@ static int nilfs_direct_assign_p(struct nilfs_direct *direct, { nilfs_direct_set_ptr(direct, key, blocknr); - binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = 0; return 0; @@ -359,30 +320,30 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, sector_t blocknr, union nilfs_binfo *binfo) { - struct nilfs_direct *direct; __u64 key; __u64 ptr; - direct = (struct nilfs_direct *)bmap; key = nilfs_bmap_data_get_key(bmap, *bh); if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, (unsigned long long)key); return -EINVAL; } - ptr = nilfs_direct_get_ptr(direct, key); + ptr = nilfs_direct_get_ptr(bmap, key); if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, (unsigned long long)ptr); return -EINVAL; } - return direct->d_ops->dop_assign(direct, key, ptr, bh, - blocknr, binfo); + return NILFS_BMAP_USE_VBN(bmap) ? + nilfs_direct_assign_v(bmap, key, ptr, bh, blocknr, binfo) : + nilfs_direct_assign_p(bmap, key, ptr, bh, blocknr, binfo); } static const struct nilfs_bmap_operations nilfs_direct_ops = { .bop_lookup = nilfs_direct_lookup, + .bop_lookup_contig = nilfs_direct_lookup_contig, .bop_insert = nilfs_direct_insert, .bop_delete = nilfs_direct_delete, .bop_clear = NULL, @@ -401,36 +362,8 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = { }; -static const struct nilfs_direct_operations nilfs_direct_ops_v = { - .dop_find_target = nilfs_direct_find_target_v, - .dop_set_target = nilfs_direct_set_target_v, - .dop_propagate = nilfs_direct_propagate_v, - .dop_assign = nilfs_direct_assign_v, -}; - -static const struct nilfs_direct_operations nilfs_direct_ops_p = { - .dop_find_target = NULL, - .dop_set_target = NULL, - .dop_propagate = NULL, - .dop_assign = nilfs_direct_assign_p, -}; - -int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) +int nilfs_direct_init(struct nilfs_bmap *bmap) { - struct nilfs_direct *direct; - - direct = (struct nilfs_direct *)bmap; bmap->b_ops = &nilfs_direct_ops; - bmap->b_low = low; - bmap->b_high = high; - switch (bmap->b_inode->i_ino) { - case NILFS_DAT_INO: - direct->d_ops = &nilfs_direct_ops_p; - break; - default: - direct->d_ops = &nilfs_direct_ops_v; - break; - } - return 0; } diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index 45d2c5cda81..dc643de20a2 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h @@ -28,20 +28,6 @@ #include "bmap.h" -struct nilfs_direct; - -/** - * struct nilfs_direct_operations - direct mapping operation table - */ -struct nilfs_direct_operations { - __u64 (*dop_find_target)(const struct nilfs_direct *, __u64); - void (*dop_set_target)(struct nilfs_direct *, __u64, __u64); - int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *); - int (*dop_assign)(struct nilfs_direct *, __u64, __u64, - struct buffer_head **, sector_t, - union nilfs_binfo *); -}; - /** * struct nilfs_direct_node - direct node * @dn_flags: flags @@ -52,27 +38,14 @@ struct nilfs_direct_node { __u8 pad[7]; }; -/** - * struct nilfs_direct - direct mapping - * @d_bmap: bmap structure - * @d_ops: direct mapping operation table - */ -struct nilfs_direct { - struct nilfs_bmap d_bmap; - - /* direct-mapping-specific members */ - const struct nilfs_direct_operations *d_ops; -}; - - #define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) #define NILFS_DIRECT_KEY_MIN 0 #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) -int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64); +int nilfs_direct_init(struct nilfs_bmap *); int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *, - __u64 *, int, __u64, __u64); + __u64 *, int); #endif /* _NILFS_DIRECT_H */ diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h new file mode 100644 index 00000000000..19ccbf9522a --- /dev/null +++ b/fs/nilfs2/export.h @@ -0,0 +1,25 @@ +#ifndef NILFS_EXPORT_H +#define NILFS_EXPORT_H + +#include <linux/exportfs.h> + +extern const struct export_operations nilfs_export_ops; + +/** + * struct nilfs_fid - NILFS file id type + * @cno: checkpoint number + * @ino: inode number + * @gen: file generation (version) for NFS + * @parent_gen: parent generation (version) for NFS + * @parent_ino: parent inode number + */ +struct nilfs_fid { + u64 cno; + u64 ino; + u32 gen; + + u32 parent_gen; + u64 parent_ino; +} __attribute__ ((packed)); + +#endif diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 6bd84a0d823..24978153c0c 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -27,7 +27,7 @@ #include "nilfs.h" #include "segment.h" -int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) +int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { /* * Called from fsync() system call @@ -37,45 +37,58 @@ int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) * This function should be implemented when the writeback function * will be implemented. */ - struct inode *inode = dentry->d_inode; + struct the_nilfs *nilfs; + struct inode *inode = file->f_mapping->host; int err; - if (!nilfs_inode_dirty(inode)) - return 0; - - if (datasync) - err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, - LLONG_MAX); - else - err = nilfs_construct_segment(inode->i_sb); + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + mutex_lock(&inode->i_mutex); + + if (nilfs_inode_dirty(inode)) { + if (datasync) + err = nilfs_construct_dsync_segment(inode->i_sb, inode, + 0, LLONG_MAX); + else + err = nilfs_construct_segment(inode->i_sb); + } + mutex_unlock(&inode->i_mutex); + nilfs = inode->i_sb->s_fs_info; + if (!err && nilfs_test_opt(nilfs, BARRIER)) { + err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (err != -EIO) + err = 0; + } return err; } static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; - int ret; + int ret = 0; - if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs))) + if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) return VM_FAULT_SIGBUS; /* -ENOSPC */ + sb_start_pagefault(inode->i_sb); lock_page(page); if (page->mapping != inode->i_mapping || page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); - return VM_FAULT_NOPAGE; /* make the VM retry the fault */ + ret = -EFAULT; /* make the VM retry the fault */ + goto out; } /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) { - unlock_page(page); + if (PageMappedToDisk(page)) goto mapped; - } + if (page_has_buffers(page)) { struct buffer_head *bh, *head; int fully_mapped = 1; @@ -90,7 +103,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (fully_mapped) { SetPageMappedToDisk(page); - unlock_page(page); goto mapped; } } @@ -102,31 +114,35 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); /* never returns -ENOMEM, but may return -ENOSPC */ if (unlikely(ret)) - return VM_FAULT_SIGBUS; + goto out; - ret = block_page_mkwrite(vma, vmf, nilfs_get_block); - if (unlikely(ret)) { + file_update_time(vma->vm_file); + ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); + if (ret) { nilfs_transaction_abort(inode->i_sb); - return ret; + goto out; } + nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); nilfs_transaction_commit(inode->i_sb); mapped: - SetPageChecked(page); - wait_on_page_writeback(page); - return 0; + wait_for_stable_page(page); + out: + sb_end_pagefault(inode->i_sb); + return block_page_mkwrite_return(ret); } -struct vm_operations_struct nilfs_file_vm_ops = { +static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = nilfs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &nilfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } @@ -134,15 +150,15 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) * We have mostly NULL's here: the current defaults are ok for * the nilfs filesystem. */ -struct file_operations nilfs_file_operations = { +const struct file_operations nilfs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = nilfs_ioctl, + .compat_ioctl = nilfs_compat_ioctl, #endif /* CONFIG_COMPAT */ .mmap = nilfs_file_mmap, .open = generic_file_open, @@ -151,10 +167,10 @@ struct file_operations nilfs_file_operations = { .splice_read = generic_file_splice_read, }; -struct inode_operations nilfs_file_inode_operations = { - .truncate = nilfs_truncate, +const struct inode_operations nilfs_file_inode_operations = { .setattr = nilfs_setattr, .permission = nilfs_permission, + .fiemap = nilfs_fiemap, }; /* end of file */ diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c deleted file mode 100644 index 93383c5cee9..00000000000 --- a/fs/nilfs2/gcdat.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * gcdat.c - NILFS shadow DAT inode for GC - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, - * and Ryusuke Konishi <ryusuke@osrg.net>. - * - */ - -#include <linux/buffer_head.h> -#include "nilfs.h" -#include "page.h" -#include "mdt.h" - -int nilfs_init_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); - int err; - - gcdat->i_state = 0; - gcdat->i_blocks = dat->i_blocks; - gii->i_flags = dii->i_flags; - gii->i_state = dii->i_state | (1 << NILFS_I_GCDAT); - gii->i_cno = 0; - nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap); - err = nilfs_copy_dirty_pages(gcdat->i_mapping, dat->i_mapping); - if (unlikely(err)) - return err; - - return nilfs_copy_dirty_pages(&gii->i_btnode_cache, - &dii->i_btnode_cache); -} - -void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); - struct address_space *mapping = dat->i_mapping; - struct address_space *gmapping = gcdat->i_mapping; - - down_write(&NILFS_MDT(dat)->mi_sem); - dat->i_blocks = gcdat->i_blocks; - dii->i_flags = gii->i_flags; - dii->i_state = gii->i_state & ~(1 << NILFS_I_GCDAT); - - nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); - - nilfs_clear_dirty_pages(mapping); - nilfs_copy_back_pages(mapping, gmapping); - /* note: mdt dirty flags should be cleared by segctor. */ - - nilfs_clear_dirty_pages(&dii->i_btnode_cache); - nilfs_copy_back_pages(&dii->i_btnode_cache, &gii->i_btnode_cache); - - up_write(&NILFS_MDT(dat)->mi_sem); -} - -void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *gii = NILFS_I(gcdat); - - gcdat->i_state = I_CLEAR; - gii->i_flags = 0; - - truncate_inode_pages(gcdat->i_mapping, 0); - truncate_inode_pages(&gii->i_btnode_cache, 0); -} diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 19d2102b6a6..57ceaf33d17 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -28,13 +28,6 @@ * gcinodes), and this file provides lookup function of the dummy * inodes and their buffer read function. * - * Since NILFS2 keeps up multiple checkpoints/snapshots accross GC, it - * has to treat blocks that belong to a same file but have different - * checkpoint numbers. To avoid interference among generations, dummy - * inodes are managed separatly from actual inodes, and their lookup - * function (nilfs_gc_iget) is designed to be specified with a - * checkpoint number argument as well as an inode number. - * * Buffers and pages held by the dummy inodes will be released each * time after they are copied to a new log. Dirty blocks made on the * current generation and the blocks to be moved by GC never overlap @@ -45,16 +38,16 @@ #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/hash.h> +#include <linux/slab.h> #include <linux/swap.h> #include "nilfs.h" +#include "btree.h" +#include "btnode.h" #include "page.h" #include "mdt.h" #include "dat.h" #include "ifile.h" -static struct address_space_operations def_gcinode_aops = {}; -/* XXX need def_gcinode_iops/fops? */ - /* * nilfs_gccache_submit_read_data() - add data buffer and submit read request * @inode - gc inode @@ -91,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, goto out; if (pbn == 0) { - struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; - /* use original dat, not gc dat. */ - err = nilfs_dat_translate(dat_inode, vbn, &pbn); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn); if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ brelse(bh); goto failed; @@ -107,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, } if (!buffer_mapped(bh)) { - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_bdev = inode->i_sb->s_bdev; set_buffer_mapped(bh); } bh->b_blocknr = pbn; @@ -147,8 +140,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { - int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, - vbn ? : pbn, pbn, out_bh, 0); + int ret; + + ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, + vbn ? : pbn, pbn, READ, out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ ret = 0; return ret; @@ -162,127 +157,42 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) if (buffer_dirty(bh)) return -EEXIST; - if (buffer_nilfs_node(bh)) - nilfs_btnode_mark_dirty(bh); - else - nilfs_mdt_mark_buffer_dirty(bh); - return 0; -} - -/* - * nilfs_init_gccache() - allocate and initialize gc_inode hash table - * @nilfs - the_nilfs - * - * Return Value: On success, 0. - * On error, a negative error code is returned. - */ -int nilfs_init_gccache(struct the_nilfs *nilfs) -{ - int loop; - - BUG_ON(nilfs->ns_gc_inodes_h); - - INIT_LIST_HEAD(&nilfs->ns_gc_inodes); - - nilfs->ns_gc_inodes_h = - kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE, - GFP_NOFS); - if (nilfs->ns_gc_inodes_h == NULL) - return -ENOMEM; - - for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++) - INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]); + if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) { + clear_buffer_uptodate(bh); + return -EIO; + } + mark_buffer_dirty(bh); return 0; } -/* - * nilfs_destroy_gccache() - free gc_inode hash table - * @nilfs - the nilfs - */ -void nilfs_destroy_gccache(struct the_nilfs *nilfs) +int nilfs_init_gcinode(struct inode *inode) { - if (nilfs->ns_gc_inodes_h) { - nilfs_remove_all_gcinode(nilfs); - kfree(nilfs->ns_gc_inodes_h); - nilfs->ns_gc_inodes_h = NULL; - } -} + struct nilfs_inode_info *ii = NILFS_I(inode); -static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, - __u64 cno) -{ - struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS); - struct nilfs_inode_info *ii; + inode->i_mode = S_IFREG; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + inode->i_mapping->a_ops = &empty_aops; + inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; - if (!inode) - return NULL; - - inode->i_op = NULL; - inode->i_fop = NULL; - inode->i_mapping->a_ops = &def_gcinode_aops; - - ii = NILFS_I(inode); - ii->i_cno = cno; ii->i_flags = 0; - ii->i_state = 1 << NILFS_I_GCINODE; - ii->i_bh = NULL; nilfs_bmap_init_gc(ii->i_bmap); - return inode; -} - -static unsigned long ihash(ino_t ino, __u64 cno) -{ - return hash_long((unsigned long)((ino << 2) + cno), - NILFS_GCINODE_HASH_BITS); -} - -/* - * nilfs_gc_iget() - find or create gc inode with specified (ino,cno) - */ -struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) -{ - struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno); - struct hlist_node *node; - struct inode *inode; - - hlist_for_each_entry(inode, node, head, i_hash) { - if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno) - return inode; - } - - inode = alloc_gcinode(nilfs, ino, cno); - if (likely(inode)) { - hlist_add_head(&inode->i_hash, head); - list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); - } - return inode; -} - -/* - * nilfs_clear_gcinode() - clear and free a gc inode - */ -void nilfs_clear_gcinode(struct inode *inode) -{ - nilfs_mdt_clear(inode); - nilfs_mdt_destroy(inode); + return 0; } -/* - * nilfs_remove_all_gcinode() - remove all inodes from the_nilfs +/** + * nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes */ -void nilfs_remove_all_gcinode(struct the_nilfs *nilfs) +void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) { - struct hlist_head *head = nilfs->ns_gc_inodes_h; - struct hlist_node *node, *n; - struct inode *inode; - int loop; + struct list_head *head = &nilfs->ns_gc_inodes; + struct nilfs_inode_info *ii; - for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) { - hlist_for_each_entry_safe(inode, node, n, head, i_hash) { - hlist_del_init(&inode->i_hash); - list_del_init(&NILFS_I(inode)->i_dirty); - nilfs_clear_gcinode(inode); /* might sleep */ - } + while (!list_empty(head)) { + ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); + list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); + iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index de86401f209..6548c7851b4 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -30,6 +30,21 @@ #include "ifile.h" /** + * struct nilfs_ifile_info - on-memory private data of ifile + * @mi: on-memory private data of metadata file + * @palloc_cache: persistent object allocator cache of ifile + */ +struct nilfs_ifile_info { + struct nilfs_mdt_info mi; + struct nilfs_palloc_cache palloc_cache; +}; + +static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile) +{ + return (struct nilfs_ifile_info *)NILFS_MDT(ifile); +} + +/** * nilfs_ifile_create_inode - create a new disk inode * @ifile: ifile inode * @out_ino: pointer to a variable to store inode number @@ -69,7 +84,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, return ret; } nilfs_palloc_commit_alloc_entry(ifile, &req); - nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); + mark_buffer_dirty(req.pr_entry_bh); nilfs_mdt_mark_dirty(ifile); *out_ino = (ino_t)req.pr_entry_nr; *out_bh = req.pr_entry_bh; @@ -111,13 +126,13 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) return ret; } - kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req.pr_entry_bh->b_page); raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, req.pr_entry_bh, kaddr); raw_inode->i_flags = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); + mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); nilfs_palloc_commit_free_entry(ifile, &req); @@ -138,13 +153,75 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); - if (unlikely(err)) { - if (err == -EINVAL) - nilfs_error(sb, __func__, "ifile is broken"); - else - nilfs_warning(sb, __func__, - "unable to read inode: %lu", - (unsigned long) ino); - } + if (unlikely(err)) + nilfs_warning(sb, __func__, "unable to read inode: %lu", + (unsigned long) ino); + return err; +} + +/** + * nilfs_ifile_count_free_inodes - calculate free inodes count + * @ifile: ifile inode + * @nmaxinodes: current maximum of available inodes count [out] + * @nfreeinodes: free inodes count [out] + */ +int nilfs_ifile_count_free_inodes(struct inode *ifile, + u64 *nmaxinodes, u64 *nfreeinodes) +{ + u64 nused; + int err; + + *nmaxinodes = 0; + *nfreeinodes = 0; + + nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count); + err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); + if (likely(!err)) + *nfreeinodes = *nmaxinodes - nused; + return err; +} + +/** + * nilfs_ifile_read - read or get ifile inode + * @sb: super block instance + * @root: root object + * @inode_size: size of an inode + * @raw_inode: on-disk ifile inode + * @inodep: buffer to store the inode + */ +int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, + size_t inode_size, struct nilfs_inode *raw_inode, + struct inode **inodep) +{ + struct inode *ifile; + int err; + + ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO); + if (unlikely(!ifile)) + return -ENOMEM; + if (!(ifile->i_state & I_NEW)) + goto out; + + err = nilfs_mdt_init(ifile, NILFS_MDT_GFP, + sizeof(struct nilfs_ifile_info)); + if (err) + goto failed; + + err = nilfs_palloc_init_blockgroup(ifile, inode_size); + if (err) + goto failed; + + nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache); + + err = nilfs_read_inode_common(ifile, raw_inode); + if (err) + goto failed; + + unlock_new_inode(ifile); + out: + *inodep = ifile; + return 0; + failed: + iget_failed(ifile); return err; } diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 5d30a35679b..679674d1337 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -31,7 +31,6 @@ #include "mdt.h" #include "alloc.h" -#define NILFS_IFILE_GFP NILFS_MDT_GFP static inline struct nilfs_inode * nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) @@ -50,4 +49,10 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); int nilfs_ifile_delete_inode(struct inode *, ino_t); int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); +int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); + +int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, + size_t inode_size, struct nilfs_inode *raw_inode, + struct inode **inodep); + #endif /* _NILFS_IFILE_H */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 49ab4a49bb4..6252b173a46 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -22,16 +22,49 @@ */ #include <linux/buffer_head.h> +#include <linux/gfp.h> #include <linux/mpage.h> #include <linux/writeback.h> -#include <linux/uio.h> +#include <linux/aio.h> #include "nilfs.h" +#include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" #include "cpfile.h" #include "ifile.h" +/** + * struct nilfs_iget_args - arguments used during comparison between inodes + * @ino: inode number + * @cno: checkpoint number + * @root: pointer on NILFS root object (mounted checkpoint) + * @for_gc: inode for GC flag + */ +struct nilfs_iget_args { + u64 ino; + __u64 cno; + struct nilfs_root *root; + int for_gc; +}; + +void nilfs_inode_add_blocks(struct inode *inode, int n) +{ + struct nilfs_root *root = NILFS_I(inode)->i_root; + + inode_add_bytes(inode, (1 << inode->i_blkbits) * n); + if (root) + atomic64_add(n, &root->blocks_count); +} + +void nilfs_inode_sub_blocks(struct inode *inode, int n) +{ + struct nilfs_root *root = NILFS_I(inode)->i_root; + + inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); + if (root) + atomic64_sub(n, &root->blocks_count); +} /** * nilfs_get_block() - get a file block on the filesystem (callback function) @@ -43,22 +76,23 @@ * * This function does not issue actual read request of the specified data * block. It is done by VFS. - * Bulk read for direct-io is not supported yet. (should be supported) */ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) { struct nilfs_inode_info *ii = NILFS_I(inode); - unsigned long blknum = 0; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + __u64 blknum = 0; int err = 0, ret; - struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); + unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; - /* This exclusion control is a workaround; should be revised */ - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - if (ret == 0) { /* found */ + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + if (ret >= 0) { /* found */ map_bh(bh_result, inode->i_sb, blknum); + if (ret > 0) + bh_result->b_size = (ret << inode->i_blkbits); goto out; } /* data block was not found */ @@ -87,18 +121,15 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, inode->i_ino, (unsigned long long)blkoff); err = 0; - } else if (err == -EINVAL) { - nilfs_error(inode->i_sb, __func__, - "broken bmap (inode=%lu)\n", - inode->i_ino); - err = -EIO; } nilfs_transaction_abort(inode->i_sb); goto out; } + nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); + set_buffer_delay(bh_result); map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed to proper value */ } else if (ret == -ENOENT) { @@ -144,6 +175,11 @@ static int nilfs_writepages(struct address_space *mapping, struct inode *inode = mapping->host; int err = 0; + if (inode->i_sb->s_flags & MS_RDONLY) { + nilfs_clear_dirty_pages(mapping, false); + return -EROFS; + } + if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_dsync_segment(inode->i_sb, inode, wbc->range_start, @@ -156,6 +192,18 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) struct inode *inode = page->mapping->host; int err; + if (inode->i_sb->s_flags & MS_RDONLY) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); @@ -171,18 +219,46 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) static int nilfs_set_page_dirty(struct page *page) { - int ret = __set_page_dirty_buffers(page); + int ret = __set_page_dirty_nobuffers(page); - if (ret) { + if (page_has_buffers(page)) { struct inode *inode = page->mapping->host; - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); - unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); - - nilfs_set_file_dirty(sbi, inode, nr_dirty); + unsigned nr_dirty = 0; + struct buffer_head *bh, *head; + + /* + * This page is locked by callers, and no other thread + * concurrently marks its buffers dirty since they are + * only dirtied through routines in fs/buffer.c in + * which call sites of mark_buffer_dirty are protected + * by page lock. + */ + bh = head = page_buffers(page); + do { + /* Do not mark hole blocks dirty */ + if (buffer_dirty(bh) || !buffer_mapped(bh)) + continue; + + set_buffer_dirty(bh); + nr_dirty++; + } while (bh = bh->b_this_page, bh != head); + + if (nr_dirty) + nilfs_set_file_dirty(inode, nr_dirty); } return ret; } +void nilfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, inode->i_size); + nilfs_truncate(inode); + } +} + static int nilfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -194,11 +270,12 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) return err; - *pagep = NULL; - err = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, nilfs_get_block); - if (unlikely(err)) + err = block_write_begin(mapping, pos, len, flags, pagep, + nilfs_get_block); + if (unlikely(err)) { + nilfs_write_failed(mapping, pos + len); nilfs_transaction_abort(inode->i_sb); + } return err; } @@ -215,32 +292,46 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, start + copied); copied = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); + nilfs_set_file_dirty(inode, nr_dirty); err = nilfs_transaction_commit(inode->i_sb); return err ? : copied; } static ssize_t -nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); ssize_t size; if (rw == WRITE) return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, nilfs_get_block, NULL); + size = blockdev_direct_IO(rw, iocb, inode, iter, offset, + nilfs_get_block); + + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && size < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + count; + + if (end > isize) + nilfs_write_failed(mapping, end); + } + return size; } -struct address_space_operations nilfs_aops = { +const struct address_space_operations nilfs_aops = { .writepage = nilfs_writepage, .readpage = nilfs_readpage, - /* .sync_page = nilfs_sync_page, */ .writepages = nilfs_writepages, .set_page_dirty = nilfs_set_page_dirty, .readpages = nilfs_readpages, @@ -249,14 +340,16 @@ struct address_space_operations nilfs_aops = { /* .releasepage = nilfs_releasepage, */ .invalidatepage = block_invalidatepage, .direct_IO = nilfs_direct_IO, + .is_partially_uptodate = block_is_partially_uptodate, }; -struct inode *nilfs_new_inode(struct inode *dir, int mode) +struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; + struct nilfs_root *root; int err = -ENOMEM; ino_t ino; @@ -267,25 +360,18 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = 1 << NILFS_I_NEW; + ii->i_root = root; - err = nilfs_ifile_create_inode(sbi->s_ifile, &ino, &ii->i_bh); + err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - atomic_inc(&sbi->s_inodes_count); - - inode->i_uid = current_fsuid(); - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else - inode->i_gid = current_fsgid(); - - inode->i_mode = mode; + atomic64_inc(&root->inodes_count); + inode_init_owner(inode, dir, mode); inode->i_ino = ino; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -298,24 +384,16 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) /* No lock is needed; iget() ensures it. */ } - ii->i_flags = NILFS_I(dir)->i_flags; - if (S_ISLNK(mode)) - ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL); - if (!S_ISDIR(mode)) - ii->i_flags &= ~NILFS_DIRSYNC_FL; + ii->i_flags = nilfs_mask_flags( + mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); /* ii->i_file_acl = 0; */ /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; -#ifdef CONFIG_NILFS_FS_POSIX_ACL - ii->i_acl = NULL; - ii->i_default_acl = NULL; -#endif - ii->i_cno = 0; nilfs_set_inode_flags(inode); - spin_lock(&sbi->s_next_gen_lock); - inode->i_generation = sbi->s_next_generation++; - spin_unlock(&sbi->s_next_gen_lock); + spin_lock(&nilfs->ns_next_gen_lock); + inode->i_generation = nilfs->ns_next_generation++; + spin_unlock(&nilfs->ns_next_gen_lock); insert_inode_hash(inode); err = nilfs_init_acl(inode, dir); @@ -324,12 +402,11 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) nilfs_init_acl(), proper cancellation of above jobs should be considered */ - mark_inode_dirty(inode); return inode; failed_acl: failed_bmap: - inode->i_nlink = 0; + clear_nlink(inode); iput(inode); /* raw_inode will be deleted through generic_delete_inode() */ goto failed; @@ -342,34 +419,21 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) return ERR_PTR(err); } -void nilfs_free_inode(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); - - clear_inode(inode); - /* XXX: check error code? Is there any thing I can do? */ - (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); - atomic_dec(&sbi->s_inodes_count); -} - void nilfs_set_inode_flags(struct inode *inode) { unsigned int flags = NILFS_I(inode)->i_flags; inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC); - if (flags & NILFS_SYNC_FL) + if (flags & FS_SYNC_FL) inode->i_flags |= S_SYNC; - if (flags & NILFS_APPEND_FL) + if (flags & FS_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & NILFS_IMMUTABLE_FL) + if (flags & FS_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; -#ifndef NILFS_ATIME_DISABLE - if (flags & NILFS_NOATIME_FL) -#endif + if (flags & FS_NOATIME_FL) inode->i_flags |= S_NOATIME; - if (flags & NILFS_DIRSYNC_FL) + if (flags & FS_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); @@ -382,9 +446,9 @@ int nilfs_read_inode_common(struct inode *inode, int err; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); - inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); - inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); + i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); + set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); @@ -402,7 +466,7 @@ int nilfs_read_inode_common(struct inode *inode, ii->i_dir_acl = S_ISREG(inode->i_mode) ? 0 : le32_to_cpu(raw_inode->i_dir_acl); #endif - ii->i_cno = 0; + ii->i_dir_start_lookup = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || @@ -416,27 +480,24 @@ int nilfs_read_inode_common(struct inode *inode, return 0; } -static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, +static int __nilfs_read_inode(struct super_block *sb, + struct nilfs_root *root, unsigned long ino, struct inode *inode) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct inode *dat = nilfs_dat_inode(sbi->s_nilfs); + struct the_nilfs *nilfs = sb->s_fs_info; struct buffer_head *bh; struct nilfs_inode *raw_inode; int err; - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - err = nilfs_ifile_get_inode_block(sbi->s_ifile, ino, &bh); + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); if (unlikely(err)) goto bad_inode; - raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); + raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); -#ifdef CONFIG_NILFS_FS_POSIX_ACL - ii->i_acl = NILFS_ACL_NOT_CACHED; - ii->i_default_acl = NILFS_ACL_NOT_CACHED; -#endif - if (nilfs_read_inode_common(inode, raw_inode)) + err = nilfs_read_inode_common(inode, raw_inode); + if (err) goto failed_unmap; if (S_ISREG(inode->i_mode)) { @@ -454,35 +515,112 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, inode->i_op = &nilfs_special_inode_operations; init_special_inode( inode, inode->i_mode, - new_decode_dev(le64_to_cpu(raw_inode->i_device_code))); + huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); } - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); + nilfs_ifile_unmap_inode(root->ifile, ino, bh); brelse(bh); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); return 0; failed_unmap: - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); + nilfs_ifile_unmap_inode(root->ifile, ino, bh); brelse(bh); bad_inode: - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); return err; } -struct inode *nilfs_iget(struct super_block *sb, unsigned long ino) +static int nilfs_iget_test(struct inode *inode, void *opaque) +{ + struct nilfs_iget_args *args = opaque; + struct nilfs_inode_info *ii; + + if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) + return 0; + + ii = NILFS_I(inode); + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) + return !args->for_gc; + + return args->for_gc && args->cno == ii->i_cno; +} + +static int nilfs_iget_set(struct inode *inode, void *opaque) +{ + struct nilfs_iget_args *args = opaque; + + inode->i_ino = args->ino; + if (args->for_gc) { + NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE; + NILFS_I(inode)->i_cno = args->cno; + NILFS_I(inode)->i_root = NULL; + } else { + if (args->root && args->ino == NILFS_ROOT_INO) + nilfs_get_root(args->root); + NILFS_I(inode)->i_root = args->root; + } + return 0; +} + +struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return ilookup5(sb, ino, nilfs_iget_test, &args); +} + +struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); +} + +struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) { struct inode *inode; int err; - inode = iget_locked(sb, ino); + inode = nilfs_iget_locked(sb, root, ino); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; - err = __nilfs_read_inode(sb, ino, inode); + err = __nilfs_read_inode(sb, root, ino, inode); + if (unlikely(err)) { + iget_failed(inode); + return ERR_PTR(err); + } + unlock_new_inode(inode); + return inode; +} + +struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, + __u64 cno) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 + }; + struct inode *inode; + int err; + + inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + err = nilfs_init_gcinode(inode); if (unlikely(err)) { iget_failed(inode); return ERR_PTR(err); @@ -497,8 +635,8 @@ void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode_info *ii = NILFS_I(inode); raw_inode->i_mode = cpu_to_le16(inode->i_mode); - raw_inode->i_uid = cpu_to_le32(inode->i_uid); - raw_inode->i_gid = cpu_to_le32(inode->i_gid); + raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); + raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le64(inode->i_size); raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); @@ -510,11 +648,21 @@ void nilfs_write_inode_common(struct inode *inode, raw_inode->i_flags = cpu_to_le32(ii->i_flags); raw_inode->i_generation = cpu_to_le32(inode->i_generation); + if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + /* zero-fill unused portion in the case of super root block */ + raw_inode->i_xattr = 0; + raw_inode->i_pad = 0; + memset((void *)raw_inode + sizeof(*raw_inode), 0, + nilfs->ns_inode_size - sizeof(*raw_inode)); + } + if (has_bmap) nilfs_bmap_write(ii->i_bmap, raw_inode); else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) raw_inode->i_device_code = - cpu_to_le64(new_encode_dev(inode->i_rdev)); + cpu_to_le64(huge_encode_dev(inode->i_rdev)); /* When extending inode, nilfs->ns_inode_size should be checked for substitutions of appended fields */ } @@ -523,22 +671,20 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) { ino_t ino = inode->i_ino; struct nilfs_inode_info *ii = NILFS_I(inode); - struct super_block *sb = inode->i_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct inode *ifile = ii->i_root->ifile; struct nilfs_inode *raw_inode; - raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); + raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); - /* The buffer is guarded with lock_buffer() by the caller */ if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) - memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); + memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); nilfs_write_inode_common(inode, raw_inode, 0); /* XXX: call with has_bmap = 0 is a workaround to avoid deadlock of bmap. This delays update of i_bmap to just before writing */ - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, ibh); + nilfs_ifile_unmap_inode(ifile, ino, ibh); } #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ @@ -551,7 +697,7 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, if (!test_bit(NILFS_I_BMAP, &ii->i_state)) return; - repeat: +repeat: ret = nilfs_bmap_last_key(ii->i_bmap, &b); if (ret == -ENOENT) return; @@ -568,14 +714,10 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, nilfs_bmap_truncate(ii->i_bmap, b) == 0)) goto repeat; - failed: - if (ret == -EINVAL) - nilfs_error(ii->vfs_inode.i_sb, __func__, - "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino); - else - nilfs_warning(ii->vfs_inode.i_sb, __func__, - "failed to truncate bmap (ino=%lu, err=%d)", - ii->vfs_inode.i_ino, ret); +failed: + nilfs_warning(ii->vfs_inode.i_sb, __func__, + "failed to truncate bmap (ino=%lu, err=%d)", + ii->vfs_inode.i_ino, ret); } void nilfs_truncate(struct inode *inode) @@ -603,32 +745,65 @@ void nilfs_truncate(struct inode *inode) if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); - nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); + nilfs_mark_inode_dirty(inode); + nilfs_set_file_dirty(inode, 0); nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. But truncate has no return value. */ } -void nilfs_delete_inode(struct inode *inode) +static void nilfs_clear_inode(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + + /* + * Free resources allocated in nilfs_read_inode(), here. + */ + BUG_ON(!list_empty(&ii->i_dirty)); + brelse(ii->i_bh); + ii->i_bh = NULL; + + if (mdi && mdi->mi_palloc_cache) + nilfs_palloc_destroy_cache(inode); + + if (test_bit(NILFS_I_BMAP, &ii->i_state)) + nilfs_bmap_clear(ii->i_bmap); + + nilfs_btnode_cache_clear(&ii->i_btnode_cache); + + if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) + nilfs_put_root(ii->i_root); +} + +void nilfs_evict_inode(struct inode *inode) { struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); + int ret; - if (unlikely(is_bad_inode(inode))) { - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); + nilfs_clear_inode(inode); return; } nilfs_transaction_begin(sb, &ti, 0); /* never fails */ - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); + /* TODO: some of the following operations may fail. */ nilfs_truncate_bmap(ii, 0); - nilfs_free_inode(inode); - /* nilfs_free_inode() marks inode buffer dirty */ + nilfs_mark_inode_dirty(inode); + clear_inode(inode); + + ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); + if (!ret) + atomic64_dec(&ii->i_root->inodes_count); + + nilfs_clear_inode(inode); + if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_transaction_commit(sb); @@ -650,32 +825,54 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) err = nilfs_transaction_begin(sb, &ti, 0); if (unlikely(err)) return err; - err = inode_setattr(inode, iattr); - if (!err && (iattr->ia_valid & ATTR_MODE)) + + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) { + inode_dio_wait(inode); + truncate_setsize(inode, iattr->ia_size); + nilfs_truncate(inode); + } + + setattr_copy(inode, iattr); + mark_inode_dirty(inode); + + if (iattr->ia_valid & ATTR_MODE) { err = nilfs_acl_chmod(inode); - if (likely(!err)) - err = nilfs_transaction_commit(sb); - else - nilfs_transaction_abort(sb); + if (unlikely(err)) + goto out_err; + } + + return nilfs_transaction_commit(sb); +out_err: + nilfs_transaction_abort(sb); return err; } -int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, - struct buffer_head **pbh) +int nilfs_permission(struct inode *inode, int mask) +{ + struct nilfs_root *root = NILFS_I(inode)->i_root; + if ((mask & MAY_WRITE) && root && + root->cno != NILFS_CPTREE_CURRENT_CNO) + return -EROFS; /* snapshot is not writable */ + + return generic_permission(inode, mask); +} + +int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_inode_info *ii = NILFS_I(inode); int err; - spin_lock(&sbi->s_inode_lock); - /* Caller of this function MUST lock s_inode_lock */ + spin_lock(&nilfs->ns_inode_lock); if (ii->i_bh == NULL) { - spin_unlock(&sbi->s_inode_lock); - err = nilfs_ifile_get_inode_block(sbi->s_ifile, inode->i_ino, - pbh); + spin_unlock(&nilfs->ns_inode_lock); + err = nilfs_ifile_get_inode_block(ii->i_root->ifile, + inode->i_ino, pbh); if (unlikely(err)) return err; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); if (ii->i_bh == NULL) ii->i_bh = *pbh; else { @@ -686,36 +883,36 @@ int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, *pbh = ii->i_bh; get_bh(*pbh); - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return 0; } int nilfs_inode_dirty(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; int ret = 0; if (!list_empty(&ii->i_dirty)) { - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || test_bit(NILFS_I_BUSY, &ii->i_state); - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); } return ret; } -int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, - unsigned nr_dirty) +int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) { struct nilfs_inode_info *ii = NILFS_I(inode); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; - atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); + atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) return 0; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && !test_bit(NILFS_I_BUSY, &ii->i_state)) { /* Because this routine may race with nilfs_dispose_list(), @@ -723,38 +920,34 @@ int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { /* This will happen when somebody is freeing this inode. */ - nilfs_warning(sbi->s_super, __func__, + nilfs_warning(inode->i_sb, __func__, "cannot get inode (ino=%lu)\n", inode->i_ino); - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return -EINVAL; /* NILFS_I_DIRTY may remain for freeing inode */ } - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &sbi->s_dirty_files); + list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); set_bit(NILFS_I_QUEUED, &ii->i_state); } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return 0; } int nilfs_mark_inode_dirty(struct inode *inode) { - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); struct buffer_head *ibh; int err; - err = nilfs_load_inode_block(sbi, inode, &ibh); + err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { nilfs_warning(inode->i_sb, __func__, "failed to reget inode block.\n"); return err; } - lock_buffer(ibh); nilfs_update_inode(inode, ibh); - unlock_buffer(ibh); - nilfs_mdt_mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(sbi->s_ifile); + mark_buffer_dirty(ibh); + nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); brelse(ibh); return 0; } @@ -769,9 +962,10 @@ int nilfs_mark_inode_dirty(struct inode *inode) * construction. This function can be called both as a single operation * and as a part of indivisible file operations. */ -void nilfs_dirty_inode(struct inode *inode) +void nilfs_dirty_inode(struct inode *inode, int flags) { struct nilfs_transaction_info ti; + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); if (is_bad_inode(inode)) { nilfs_warning(inode->i_sb, __func__, @@ -779,7 +973,142 @@ void nilfs_dirty_inode(struct inode *inode) dump_stack(); return; } + if (mdi) { + nilfs_mdt_mark_dirty(inode); + return; + } nilfs_transaction_begin(inode->i_sb, &ti, 0); nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ } + +int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + __u64 logical = 0, phys = 0, size = 0; + __u32 flags = 0; + loff_t isize; + sector_t blkoff, end_blkoff; + sector_t delalloc_blkoff; + unsigned long delalloc_blklen; + unsigned int blkbits = inode->i_blkbits; + int ret, n; + + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + + isize = i_size_read(inode); + + blkoff = start >> blkbits; + end_blkoff = (start + len - 1) >> blkbits; + + delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, + &delalloc_blkoff); + + do { + __u64 blkphy; + unsigned int maxblocks; + + if (delalloc_blklen && blkoff == delalloc_blkoff) { + if (size) { + /* End of the current extent */ + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, flags); + if (ret) + break; + } + if (blkoff > end_blkoff) + break; + + flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; + logical = blkoff << blkbits; + phys = 0; + size = delalloc_blklen << blkbits; + + blkoff = delalloc_blkoff + delalloc_blklen; + delalloc_blklen = nilfs_find_uncommitted_extent( + inode, blkoff, &delalloc_blkoff); + continue; + } + + /* + * Limit the number of blocks that we look up so as + * not to get into the next delayed allocation extent. + */ + maxblocks = INT_MAX; + if (delalloc_blklen) + maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, + maxblocks); + blkphy = 0; + + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + n = nilfs_bmap_lookup_contig( + NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + + if (n < 0) { + int past_eof; + + if (unlikely(n != -ENOENT)) + break; /* error */ + + /* HOLE */ + blkoff++; + past_eof = ((blkoff << blkbits) >= isize); + + if (size) { + /* End of the current extent */ + + if (past_eof) + flags |= FIEMAP_EXTENT_LAST; + + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, flags); + if (ret) + break; + size = 0; + } + if (blkoff > end_blkoff || past_eof) + break; + } else { + if (size) { + if (phys && blkphy << blkbits == phys + size) { + /* The current extent goes on */ + size += n << blkbits; + } else { + /* Terminate the current extent */ + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, + flags); + if (ret || blkoff > end_blkoff) + break; + + /* Start another extent */ + flags = FIEMAP_EXTENT_MERGED; + logical = blkoff << blkbits; + phys = blkphy << blkbits; + size = n << blkbits; + } + } else { + /* Start a new extent */ + flags = FIEMAP_EXTENT_MERGED; + logical = blkoff << blkbits; + phys = blkphy << blkbits; + size = n << blkbits; + } + blkoff += n; + } + cond_resched(); + } while (true); + + /* If ret is 1 then we just hit the end of the extent array */ + if (ret == 1) + ret = 0; + + mutex_unlock(&inode->i_mutex); + return ret; +} diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index d6759b92006..422fb54b737 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -22,10 +22,13 @@ #include <linux/fs.h> #include <linux/wait.h> -#include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */ +#include <linux/slab.h> #include <linux/capability.h> /* capable() */ #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ #include <linux/vmalloc.h> +#include <linux/compat.h> /* compat_ptr() */ +#include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */ +#include <linux/buffer_head.h> #include <linux/nilfs2_fs.h> #include "nilfs.h" #include "segment.h" @@ -34,7 +37,26 @@ #include "sufile.h" #include "dat.h" - +/** + * nilfs_ioctl_wrap_copy - wrapping function of get/set metadata info + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @dir: set of direction flags + * @dofunc: concrete function of get/set metadata info + * + * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of + * calling dofunc() function on the basis of @argv argument. + * + * Return Value: On success, 0 is returned and requested metadata info + * is copied into userspace. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, struct nilfs_argv *argv, int dir, ssize_t (*dofunc)(struct the_nilfs *, @@ -54,6 +76,14 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_size > PAGE_SIZE) return -EINVAL; + /* + * Reject pairs of a start item position (argv->v_index) and a + * total count (argv->v_nmembs) which leads position 'pos' to + * overflow by the increment at the end of the loop. + */ + if (argv->v_index > ~(__u64)0 - argv->v_nmembs) + return -EINVAL; + buf = (void *)__get_free_pages(GFP_NOFS, 0); if (unlikely(!buf)) return -ENOMEM; @@ -96,54 +126,198 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, return ret; } +/** + * nilfs_ioctl_getflags - ioctl to support lsattr + */ +static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp) +{ + unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE; + + return put_user(flags, (int __user *)argp); +} + +/** + * nilfs_ioctl_setflags - ioctl to support chattr + */ +static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp, + void __user *argp) +{ + struct nilfs_transaction_info ti; + unsigned int flags, oldflags; + int ret; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (get_user(flags, (int __user *)argp)) + return -EFAULT; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + flags = nilfs_mask_flags(inode->i_mode, flags); + + mutex_lock(&inode->i_mutex); + + oldflags = NILFS_I(inode)->i_flags; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by the + * relevant capability. + */ + ret = -EPERM; + if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && + !capable(CAP_LINUX_IMMUTABLE)) + goto out; + + ret = nilfs_transaction_begin(inode->i_sb, &ti, 0); + if (ret) + goto out; + + NILFS_I(inode)->i_flags = (oldflags & ~FS_FL_USER_MODIFIABLE) | + (flags & FS_FL_USER_MODIFIABLE); + + nilfs_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME; + if (IS_SYNC(inode)) + nilfs_set_transaction_flag(NILFS_TI_SYNC); + + nilfs_mark_inode_dirty(inode); + ret = nilfs_transaction_commit(inode->i_sb); +out: + mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(filp); + return ret; +} + +/** + * nilfs_ioctl_getversion - get info about a file's version (generation number) + */ +static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) +{ + return put_user(inode->i_generation, (int __user *)argp); +} + +/** + * nilfs_ioctl_change_cpmode - change checkpoint mode (checkpoint/snapshot) + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_change_cpmode() function changes mode of + * given checkpoint between checkpoint and snapshot state. This ioctl + * is used in chcp and mkcp utilities. + * + * Return Value: On success, 0 is returned and mode of a checkpoint is + * changed. On error, one of the following negative error codes + * is returned. + * + * %-EPERM - Operation not permitted. + * + * %-EFAULT - Failure during checkpoint mode changing. + */ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_transaction_info ti; struct nilfs_cpmode cpmode; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = -EFAULT; if (copy_from_user(&cpmode, argp, sizeof(cpmode))) - return -EFAULT; + goto out; + + mutex_lock(&nilfs->ns_snapshot_mount_mutex); nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( - cpfile, cpmode.cm_cno, cpmode.cm_mode); - if (unlikely(ret < 0)) { + nilfs->ns_cpfile, cpmode.cm_cno, cpmode.cm_mode); + if (unlikely(ret < 0)) nilfs_transaction_abort(inode->i_sb); - return ret; - } - nilfs_transaction_commit(inode->i_sb); /* never fails */ + else + nilfs_transaction_commit(inode->i_sb); /* never fails */ + + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); +out: + mnt_drop_write_file(filp); return ret; } +/** + * nilfs_ioctl_delete_checkpoint - remove checkpoint + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_delete_checkpoint() function removes + * checkpoint from NILFS2 file system. This ioctl is used in rmcp + * utility. + * + * Return Value: On success, 0 is returned and a checkpoint is + * removed. On error, one of the following negative error codes + * is returned. + * + * %-EPERM - Operation not permitted. + * + * %-EFAULT - Failure during checkpoint removing. + */ static int nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_transaction_info ti; __u64 cno; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = -EFAULT; if (copy_from_user(&cno, argp, sizeof(cno))) - return -EFAULT; + goto out; nilfs_transaction_begin(inode->i_sb, &ti, 0); - ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); - if (unlikely(ret < 0)) { + ret = nilfs_cpfile_delete_checkpoint(nilfs->ns_cpfile, cno); + if (unlikely(ret < 0)) nilfs_transaction_abort(inode->i_sb); - return ret; - } - nilfs_transaction_commit(inode->i_sb); /* never fails */ + else + nilfs_transaction_commit(inode->i_sb); /* never fails */ +out: + mnt_drop_write_file(filp); return ret; } +/** + * nilfs_ioctl_do_get_cpinfo - callback method getting info about checkpoints + * @nilfs: nilfs object + * @posp: pointer on array of checkpoint's numbers + * @flags: checkpoint mode (checkpoint or snapshot) + * @buf: buffer for storing checkponts' info + * @size: size in bytes of one checkpoint info item in array + * @nmembs: number of checkpoints in array (numbers and infos) + * + * Description: nilfs_ioctl_do_get_cpinfo() function returns info about + * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in + * lscp utility and by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_cpinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -152,15 +326,36 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, - nmembs); + size, nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } +/** + * nilfs_ioctl_get_cpstat - get checkpoints statistics + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints. + * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities + * and by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and checkpoints information is + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting checkpoints statistics. + */ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_cpstat cpstat; int ret; @@ -175,6 +370,21 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_do_get_suinfo - callback method getting segment usage info + * @nilfs: nilfs object + * @posp: pointer on array of segment numbers + * @flags: *not used* + * @buf: buffer for storing suinfo array + * @size: size in bytes of one suinfo item in array + * @nmembs: count of segment numbers and suinfos in array + * + * Description: nilfs_ioctl_do_get_suinfo() function returns segment usage + * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used + * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_suinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -182,15 +392,37 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, int ret; down_read(&nilfs->ns_segctor_sem); - ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); + ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, size, + nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } +/** + * nilfs_ioctl_get_sustat - get segment usage statistics + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_get_sustat() returns segment usage statistics. + * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities + * and by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and segment usage information is + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting segment usage statistics. + */ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_sustat sustat; int ret; @@ -205,6 +437,21 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_do_get_vinfo - callback method getting virtual blocks info + * @nilfs: nilfs object + * @posp: *not used* + * @flags: *not used* + * @buf: buffer for storing array of nilfs_vinfo structures + * @size: size in bytes of one vinfo item in array + * @nmembs: count of vinfos in array + * + * Description: nilfs_ioctl_do_get_vinfo() function returns information + * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used + * by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_vinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -212,17 +459,31 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, int ret; down_read(&nilfs->ns_segctor_sem); - ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_get_vinfo(nilfs->ns_dat, buf, size, nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } +/** + * nilfs_ioctl_do_get_bdescs - callback method getting disk block descriptors + * @nilfs: nilfs object + * @posp: *not used* + * @flags: *not used* + * @buf: buffer for storing array of nilfs_bdesc structures + * @size: size in bytes of one bdesc item in array + * @nmembs: count of bdescs in array + * + * Description: nilfs_ioctl_do_get_bdescs() function returns information + * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl + * is used by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_bdescs structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - struct inode *dat = nilfs_dat_inode(nilfs); - struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; + struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; int ret, i; @@ -244,10 +505,33 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, return nmembs; } +/** + * nilfs_ioctl_get_bdescs - get disk block descriptors + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_do_get_bdescs() function returns information + * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl + * is used by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and disk block descriptors are + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting disk block descriptors. + */ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_argv argv; int ret; @@ -267,6 +551,26 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_move_inode_block - prepare data/node block for moving by GC + * @inode: inode object + * @vdesc: descriptor of virtual block number + * @buffers: list of moving buffers + * + * Description: nilfs_ioctl_move_inode_block() function registers data/node + * buffer in the GC pagecache and submit read request. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - Requested block doesn't exist. + * + * %-EEXIST - Blocks conflict is detected. + */ static int nilfs_ioctl_move_inode_block(struct inode *inode, struct nilfs_vdesc *vdesc, struct list_head *buffers) @@ -296,15 +600,40 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, (unsigned long long)vdesc->vd_vblocknr); return ret; } - bh->b_private = vdesc; + if (unlikely(!list_empty(&bh->b_assoc_buffers))) { + printk(KERN_CRIT "%s: conflicting %s buffer: ino=%llu, " + "cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu\n", + __func__, vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); + brelse(bh); + return -EEXIST; + } list_add_tail(&bh->b_assoc_buffers, buffers); return 0; } -static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, +/** + * nilfs_ioctl_move_blocks - move valid inode's blocks during garbage collection + * @sb: superblock object + * @argv: vector of arguments from userspace + * @buf: array of nilfs_vdesc structures + * + * Description: nilfs_ioctl_move_blocks() function reads valid data/node + * blocks that garbage collector specified with the array of nilfs_vdesc + * structures and stores them into page caches of GC inodes. + * + * Return Value: Number of processed nilfs_vdesc structures or + * error code, otherwise. + */ +static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; + struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_vdesc *vdesc; struct buffer_head *bh, *n; @@ -316,42 +645,43 @@ static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, for (i = 0, vdesc = buf; i < nmembs; ) { ino = vdesc->vd_ino; cno = vdesc->vd_cno; - inode = nilfs_gc_iget(nilfs, ino, cno); - if (unlikely(inode == NULL)) { - ret = -ENOMEM; + inode = nilfs_iget_for_gc(sb, ino, cno); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); goto failed; } + if (list_empty(&NILFS_I(inode)->i_dirty)) { + /* + * Add the inode to GC inode list. Garbage Collection + * is serialized and no two processes manipulate the + * list simultaneously. + */ + igrab(inode); + list_add(&NILFS_I(inode)->i_dirty, + &nilfs->ns_gc_inodes); + } + do { ret = nilfs_ioctl_move_inode_block(inode, vdesc, &buffers); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + iput(inode); goto failed; + } vdesc++; } while (++i < nmembs && vdesc->vd_ino == ino && vdesc->vd_cno == cno); + + iput(inode); /* The inode still remains in GC inode list */ } list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { ret = nilfs_gccache_wait_and_mark_dirty(bh); if (unlikely(ret < 0)) { - if (ret == -EEXIST) { - vdesc = bh->b_private; - printk(KERN_CRIT - "%s: conflicting %s buffer: " - "ino=%llu, cno=%llu, offset=%llu, " - "blocknr=%llu, vblocknr=%llu\n", - __func__, - vdesc->vd_flags ? "node" : "data", - (unsigned long long)vdesc->vd_ino, - (unsigned long long)vdesc->vd_cno, - (unsigned long long)vdesc->vd_offset, - (unsigned long long)vdesc->vd_blocknr, - (unsigned long long)vdesc->vd_vblocknr); - } + WARN_ON(ret == -EEXIST); goto failed; } list_del_init(&bh->b_assoc_buffers); - bh->b_private = NULL; brelse(bh); } return nmembs; @@ -359,12 +689,30 @@ static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, failed: list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { list_del_init(&bh->b_assoc_buffers); - bh->b_private = NULL; brelse(bh); } return ret; } +/** + * nilfs_ioctl_delete_checkpoints - delete checkpoints + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of periods of checkpoints numbers + * + * Description: nilfs_ioctl_delete_checkpoints() function deletes checkpoints + * in the period from p_start to p_end, excluding p_end itself. The checkpoints + * which have been already deleted are ignored. + * + * Return Value: Number of processed nilfs_period structures or + * error code, otherwise. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - invalid checkpoints. + */ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { @@ -382,23 +730,58 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, return nmembs; } +/** + * nilfs_ioctl_free_vblocknrs - free virtual block numbers + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of virtual block numbers + * + * Description: nilfs_ioctl_free_vblocknrs() function frees + * the virtual block numbers specified by @buf and @argv->v_nmembs. + * + * Return Value: Number of processed virtual block numbers or + * error code, otherwise. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - The virtual block number have not been allocated. + */ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; int ret; - ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_freev(nilfs->ns_dat, buf, nmembs); return (ret < 0) ? ret : nmembs; } +/** + * nilfs_ioctl_mark_blocks_dirty - mark blocks dirty + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of block descriptors + * + * Description: nilfs_ioctl_mark_blocks_dirty() function marks + * metadata file or data blocks as dirty. + * + * Return Value: Number of processed block descriptors or + * error code, otherwise. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EIO - I/O error + * + * %-ENOENT - the specified block does not exist (hole block) + */ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; - struct inode *dat = nilfs_dat_inode(nilfs); - struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; + struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; int ret, i; @@ -417,7 +800,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, /* skip dead block */ continue; if (bdescs[i].bd_level == 0) { - ret = nilfs_mdt_mark_block_dirty(dat, + ret = nilfs_mdt_mark_block_dirty(nilfs->ns_dat, bdescs[i].bd_offset); if (ret < 0) { WARN_ON(ret == -ENOENT); @@ -435,36 +818,12 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, return nmembs; } -static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, - struct nilfs_argv *argv, void *buf) -{ - size_t nmembs = argv->v_nmembs; - struct nilfs_sb_info *sbi = nilfs->ns_writer; - int ret; - - if (unlikely(!sbi)) { - /* never happens because called for a writable mount */ - WARN_ON(1); - return -EROFS; - } - ret = nilfs_segctor_add_segments_to_be_freed( - NILFS_SC(sbi), buf, nmembs); - - return (ret < 0) ? ret : nmembs; -} - int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, struct nilfs_argv *argv, void **kbufs) { const char *msg; int ret; - ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); - if (ret < 0) { - msg = "cannot read source blocks"; - goto failed; - } - ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); if (ret < 0) { /* @@ -491,28 +850,33 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot mark copying blocks dirty"; goto failed; } - ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]); - if (ret < 0) { - /* - * can safely abort because this operation is atomic. - */ - msg = "cannot set segments to be freed"; - goto failed; - } return 0; failed: - nilfs_remove_all_gcinode(nilfs); printk(KERN_ERR "NILFS: GC failed during preparation: %s: err=%d\n", msg, ret); return ret; } +/** + * nilfs_ioctl_clean_segments - clean segments + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_clean_segments() function makes garbage + * collection operation in the environment of requested parameters + * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by + * nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { struct nilfs_argv argv[5]; - const static size_t argsz[5] = { + static const size_t argsz[5] = { sizeof(struct nilfs_vdesc), sizeof(struct nilfs_period), sizeof(__u64), @@ -528,12 +892,21 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, if (!capable(CAP_SYS_ADMIN)) return -EPERM; + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = -EFAULT; if (copy_from_user(argv, argp, sizeof(argv))) - return -EFAULT; + goto out; + ret = -EINVAL; nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) - return -EINVAL; + goto out; + if (nsegs > UINT_MAX / sizeof(__u64)) + goto out; + /* * argv[4] points to segment numbers this ioctl cleans. We * use kmalloc() for its buffer because memory used for the @@ -541,10 +914,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, */ kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, nsegs * sizeof(__u64)); - if (IS_ERR(kbufs[4])) - return PTR_ERR(kbufs[4]); - - nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + if (IS_ERR(kbufs[4])) { + ret = PTR_ERR(kbufs[4]); + goto out; + } + nilfs = inode->i_sb->s_fs_info; for (n = 0; n < 4; n++) { ret = -EINVAL; @@ -554,6 +928,9 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) goto out_free; + if (argv[n].v_nmembs >= UINT_MAX / argv[n].v_size) + goto out_free; + len = argv[n].v_size * argv[n].v_nmembs; base = (void __user *)(unsigned long)argv[n].v_base; if (len == 0) { @@ -573,33 +950,234 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, } } - ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + /* + * nilfs_ioctl_move_blocks() will call nilfs_iget_for_gc(), + * which will operates an inode list without blocking. + * To protect the list from concurrent operations, + * nilfs_ioctl_move_blocks should be atomic operation. + */ + if (test_and_set_bit(THE_NILFS_GC_RUNNING, &nilfs->ns_flags)) { + ret = -EBUSY; + goto out_free; + } + + ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); + if (ret < 0) + printk(KERN_ERR "NILFS: GC failed during preparation: " + "cannot read source blocks: err=%d\n", ret); + else { + if (nilfs_sb_need_update(nilfs)) + set_nilfs_discontinued(nilfs); + ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + } + + nilfs_remove_all_gcinodes(nilfs); + clear_nilfs_gc_running(nilfs); - out_free: +out_free: while (--n >= 0) vfree(kbufs[n]); kfree(kbufs[4]); +out: + mnt_drop_write_file(filp); return ret; } +/** + * nilfs_ioctl_sync - make a checkpoint + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_sync() function constructs a logical segment + * for checkpointing. This function guarantees that all modified data + * and metadata are written out to the device when it successfully + * returned. + * + * Return Value: On success, 0 is retured. On errors, one of the following + * negative error code is returned. + * + * %-EROFS - Read only filesystem. + * + * %-EIO - I/O error + * + * %-ENOSPC - No space left on device (only in a panic state). + * + * %-ERESTARTSYS - Interrupted. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { __u64 cno; int ret; + struct the_nilfs *nilfs; ret = nilfs_construct_segment(inode->i_sb); if (ret < 0) return ret; + nilfs = inode->i_sb->s_fs_info; + if (nilfs_test_opt(nilfs, BARRIER)) { + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (ret == -EIO) + return ret; + } + if (argp != NULL) { - cno = NILFS_SB(inode->i_sb)->s_nilfs->ns_cno - 1; + down_read(&nilfs->ns_segctor_sem); + cno = nilfs->ns_cno - 1; + up_read(&nilfs->ns_segctor_sem); if (copy_to_user(argp, &cno, sizeof(cno))) return -EFAULT; } return 0; } +/** + * nilfs_ioctl_resize - resize NILFS2 volume + * @inode: inode object + * @filp: file object + * @argp: pointer on argument from userspace + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ +static int nilfs_ioctl_resize(struct inode *inode, struct file *filp, + void __user *argp) +{ + __u64 newsize; + int ret = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = mnt_want_write_file(filp); + if (ret) + goto out; + + ret = -EFAULT; + if (copy_from_user(&newsize, argp, sizeof(newsize))) + goto out_drop_write; + + ret = nilfs_resize_fs(inode->i_sb, newsize); + +out_drop_write: + mnt_drop_write_file(filp); +out: + return ret; +} + +/** + * nilfs_ioctl_trim_fs() - trim ioctl handle function + * @inode: inode object + * @argp: pointer on argument from userspace + * + * Decription: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It + * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which + * performs the actual trim operation. + * + * Return Value: On success, 0 is returned or negative error code, otherwise. + */ +static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + struct request_queue *q = bdev_get_queue(nilfs->ns_bdev); + struct fstrim_range range; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + + if (copy_from_user(&range, argp, sizeof(range))) + return -EFAULT; + + range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity); + + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range); + up_read(&nilfs->ns_segctor_sem); + + if (ret < 0) + return ret; + + if (copy_to_user(argp, &range, sizeof(range))) + return -EFAULT; + + return 0; +} + +/** + * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated + * @inode: inode object + * @argp: pointer on argument from userspace + * + * Decription: nilfs_ioctl_set_alloc_range() function defines lower limit + * of segments in bytes and upper limit of segments in bytes. + * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility. + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ +static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + __u64 range[2]; + __u64 minseg, maxseg; + unsigned long segbytes; + int ret = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = -EFAULT; + if (copy_from_user(range, argp, sizeof(__u64[2]))) + goto out; + + ret = -ERANGE; + if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode)) + goto out; + + segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize; + + minseg = range[0] + segbytes - 1; + do_div(minseg, segbytes); + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); + do_div(maxseg, segbytes); + maxseg--; + + ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg); +out: + return ret; +} + +/** + * nilfs_ioctl_get_info - wrapping function of get metadata info + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * @membsz: size of an item in bytes + * @dofunc: concrete function of getting metadata info + * + * Description: nilfs_ioctl_get_info() gets metadata info by means of + * calling dofunc() function. + * + * Return Value: On success, 0 is returned and requested metadata info + * is copied into userspace. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, size_t membsz, @@ -608,14 +1186,14 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, void *, size_t, size_t)) { - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_argv argv; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - if (argv.v_size != membsz) + if (argv.v_size < membsz) return -EINVAL; ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); @@ -627,12 +1205,107 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_set_suinfo - set segment usage info + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: Expects an array of nilfs_suinfo_update structures + * encapsulated in nilfs_argv and updates the segment usage info + * according to the flags in nilfs_suinfo_update. + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EPERM - Not enough permissions + * + * %-EFAULT - Error copying input data + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid values in input (segment number, flags or nblocks) + */ +static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + struct nilfs_transaction_info ti; + struct nilfs_argv argv; + size_t len; + void __user *base; + void *kbuf; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = -EFAULT; + if (copy_from_user(&argv, argp, sizeof(argv))) + goto out; + + ret = -EINVAL; + if (argv.v_size < sizeof(struct nilfs_suinfo_update)) + goto out; + + if (argv.v_nmembs > nilfs->ns_nsegments) + goto out; + + if (argv.v_nmembs >= UINT_MAX / argv.v_size) + goto out; + + len = argv.v_size * argv.v_nmembs; + if (!len) { + ret = 0; + goto out; + } + + base = (void __user *)(unsigned long)argv.v_base; + kbuf = vmalloc(len); + if (!kbuf) { + ret = -ENOMEM; + goto out; + } + + if (copy_from_user(kbuf, base, len)) { + ret = -EFAULT; + goto out_free; + } + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size, + argv.v_nmembs); + if (unlikely(ret < 0)) + nilfs_transaction_abort(inode->i_sb); + else + nilfs_transaction_commit(inode->i_sb); /* never fails */ + +out_free: + vfree(kbuf); +out: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; - void __user *argp = (void * __user *)arg; + struct inode *inode = file_inode(filp); + void __user *argp = (void __user *)arg; switch (cmd) { + case FS_IOC_GETFLAGS: + return nilfs_ioctl_getflags(inode, argp); + case FS_IOC_SETFLAGS: + return nilfs_ioctl_setflags(inode, filp, argp); + case FS_IOC_GETVERSION: + return nilfs_ioctl_getversion(inode, argp); case NILFS_IOCTL_CHANGE_CPMODE: return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp); case NILFS_IOCTL_DELETE_CHECKPOINT: @@ -647,6 +1320,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_get_info(inode, filp, cmd, argp, sizeof(struct nilfs_suinfo), nilfs_ioctl_do_get_suinfo); + case NILFS_IOCTL_SET_SUINFO: + return nilfs_ioctl_set_suinfo(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: @@ -659,7 +1334,48 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); case NILFS_IOCTL_SYNC: return nilfs_ioctl_sync(inode, filp, cmd, argp); + case NILFS_IOCTL_RESIZE: + return nilfs_ioctl_resize(inode, filp, argp); + case NILFS_IOCTL_SET_ALLOC_RANGE: + return nilfs_ioctl_set_alloc_range(inode, argp); + case FITRIM: + return nilfs_ioctl_trim_fs(inode, argp); default: return -ENOTTY; } } + +#ifdef CONFIG_COMPAT +long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + case FS_IOC32_GETVERSION: + cmd = FS_IOC_GETVERSION; + break; + case NILFS_IOCTL_CHANGE_CPMODE: + case NILFS_IOCTL_DELETE_CHECKPOINT: + case NILFS_IOCTL_GET_CPINFO: + case NILFS_IOCTL_GET_CPSTAT: + case NILFS_IOCTL_GET_SUINFO: + case NILFS_IOCTL_SET_SUINFO: + case NILFS_IOCTL_GET_SUSTAT: + case NILFS_IOCTL_GET_VINFO: + case NILFS_IOCTL_GET_BDESCS: + case NILFS_IOCTL_CLEAN_SEGMENTS: + case NILFS_IOCTL_SYNC: + case NILFS_IOCTL_RESIZE: + case NILFS_IOCTL_SET_ALLOC_RANGE: + case FITRIM: + break; + default: + return -ENOIOCTLCMD; + } + return nilfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#endif diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index bb78745a0e3..c4dcd1db57e 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -26,7 +26,9 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/swap.h> +#include <linux/slab.h> #include "nilfs.h" +#include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" @@ -34,7 +36,6 @@ #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) -#define INIT_UNUSED_INODE_FIELDS static int nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, @@ -57,15 +58,15 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_buffer_uptodate(bh); - nilfs_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); return 0; } @@ -76,25 +77,11 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, struct buffer_head *, void *)) { - struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; - if (!sb) { - /* - * Make sure this function is not called from any - * read-only context. - */ - if (!nilfs->ns_writer) { - WARN_ON(1); - err = -EROFS; - goto out; - } - sb = nilfs->ns_writer->s_super; - } - nilfs_transaction_begin(sb, &ti, 0); err = -ENOMEM; @@ -103,17 +90,14 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, goto failed_unlock; err = -EEXIST; - if (buffer_uptodate(bh) || buffer_mapped(bh)) + if (buffer_uptodate(bh)) goto failed_bh; -#if 0 - /* The uptodate flag is not protected by the page lock, but - the mapped flag is. Thus, we don't have to wait the buffer. */ + wait_on_buffer(bh); if (buffer_uptodate(bh)) goto failed_bh; -#endif - bh->b_bdev = nilfs->ns_bdev; + bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); @@ -130,7 +114,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); - out: + return err; } @@ -139,7 +123,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, int mode, struct buffer_head **out_bh) { struct buffer_head *bh; - unsigned long blknum = 0; + __u64 blknum = 0; int ret = -ENOMEM; bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); @@ -162,17 +146,13 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, unlock_buffer(bh); goto out; } - if (!buffer_mapped(bh)) { /* unused buffer */ - ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, - &blknum); - if (unlikely(ret)) { - unlock_buffer(bh); - goto failed_bh; - } - bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; - bh->b_blocknr = blknum; - set_buffer_mapped(bh); + + ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum); + if (unlikely(ret)) { + unlock_buffer(bh); + goto failed_bh; } + map_bh(bh, inode->i_sb, (sector_t)blknum); bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -191,7 +171,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, } static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, - struct buffer_head **out_bh) + int readahead, struct buffer_head **out_bh) { struct buffer_head *first_bh, *bh; unsigned long blkoff; @@ -205,16 +185,18 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, if (unlikely(err)) goto failed; - blkoff = block + 1; - for (i = 0; i < nr_ra_blocks; i++, blkoff++) { - err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); - if (likely(!err || err == -EEXIST)) - brelse(bh); - else if (err != -EBUSY) - break; /* abort readahead if bmap lookup failed */ - - if (!buffer_locked(first_bh)) - goto out_no_wait; + if (readahead) { + blkoff = block + 1; + for (i = 0; i < nr_ra_blocks; i++, blkoff++) { + err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); + if (likely(!err || err == -EEXIST)) + brelse(bh); + else if (err != -EBUSY) + break; + /* abort readahead if bmap lookup failed */ + if (!buffer_locked(first_bh)) + goto out_no_wait; + } } wait_on_buffer(first_bh); @@ -255,8 +237,6 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, * * %-ENOENT - the specified block does not exist (hole block) * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) - * * %-EROFS - Read only filesystem (for create mode) */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, @@ -268,7 +248,7 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, /* Should be rewritten with merging nilfs_mdt_read_block() */ retry: - ret = nilfs_mdt_read_block(inode, blkoff, out_bh); + ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh); if (!create || ret != -ENOENT) return ret; @@ -291,8 +271,6 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error - * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { @@ -368,18 +346,16 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) * %-EIO - I/O error * * %-ENOENT - the specified block does not exist (hole block) - * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) */ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) { struct buffer_head *bh; int err; - err = nilfs_mdt_read_block(inode, block, &bh); + err = nilfs_mdt_read_block(inode, block, 0, &bh); if (unlikely(err)) return err; - nilfs_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); brelse(bh); return 0; @@ -399,131 +375,67 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode = container_of(page->mapping, - struct inode, i_data); - struct super_block *sb = inode->i_sb; - struct nilfs_sb_info *writer = NULL; + struct inode *inode = page->mapping->host; + struct super_block *sb; int err = 0; + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); - if (page->mapping->assoc_mapping) - return 0; /* Do not request flush for shadow page cache */ - if (!sb) { - writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); - if (!writer) - return -EROFS; - sb = writer->s_super; - } + if (!inode) + return 0; + + sb = inode->i_sb; if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); else if (wbc->for_reclaim) nilfs_flush_segment(sb, inode->i_ino); - if (writer) - nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); return err; } -static struct address_space_operations def_mdt_aops = { +static const struct address_space_operations def_mdt_aops = { .writepage = nilfs_mdt_write_page, }; -static struct inode_operations def_mdt_iops; -static struct file_operations def_mdt_fops; +static const struct inode_operations def_mdt_iops; +static const struct file_operations def_mdt_fops; -/* - * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, - * ifile, or gcinodes. This allows the B-tree code and segment constructor - * to treat them like regular files, and this helps to simplify the - * implementation. - * On the other hand, some of the pseudo inodes have an irregular point: - * They don't have valid inode->i_sb pointer because their lifetimes are - * longer than those of the super block structs; they may continue for - * several consecutive mounts/umounts. This would need discussions. - */ -struct inode * -nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, gfp_t gfp_mask) -{ - struct inode *inode = nilfs_alloc_inode(sb); - if (!inode) - return NULL; - else { - struct address_space * const mapping = &inode->i_data; - struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); - - if (!mi) { - nilfs_destroy_inode(inode); - return NULL; - } - mi->mi_nilfs = nilfs; - init_rwsem(&mi->mi_sem); - - inode->i_sb = sb; /* sb may be NULL for some meta data files */ - inode->i_blkbits = nilfs->ns_blocksize_bits; - inode->i_flags = 0; - atomic_set(&inode->i_count, 1); - inode->i_nlink = 1; - inode->i_ino = ino; - inode->i_mode = S_IFREG; - inode->i_private = mi; - -#ifdef INIT_UNUSED_INODE_FIELDS - atomic_set(&inode->i_writecount, 0); - inode->i_size = 0; - inode->i_blocks = 0; - inode->i_bytes = 0; - inode->i_generation = 0; -#ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); -#endif - inode->i_pipe = NULL; - inode->i_bdev = NULL; - inode->i_cdev = NULL; - inode->i_rdev = 0; -#ifdef CONFIG_SECURITY - inode->i_security = NULL; -#endif - inode->dirtied_when = 0; - - INIT_LIST_HEAD(&inode->i_list); - INIT_LIST_HEAD(&inode->i_sb_list); - inode->i_state = 0; -#endif - - spin_lock_init(&inode->i_lock); - mutex_init(&inode->i_mutex); - init_rwsem(&inode->i_alloc_sem); - - mapping->host = NULL; /* instead of inode */ - mapping->flags = 0; - mapping_set_gfp_mask(mapping, gfp_mask); - mapping->assoc_mapping = NULL; - mapping->backing_dev_info = nilfs->ns_bdi; - - inode->i_mapping = mapping; - } +int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) +{ + struct nilfs_mdt_info *mi; - return inode; -} + mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); + if (!mi) + return -ENOMEM; -struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, gfp_t gfp_mask) -{ - struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); + init_rwsem(&mi->mi_sem); + inode->i_private = mi; - if (!inode) - return NULL; + inode->i_mode = S_IFREG; + mapping_set_gfp_mask(inode->i_mapping, gfp_mask); + inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; inode->i_op = &def_mdt_iops; inode->i_fop = &def_mdt_fops; inode->i_mapping->a_ops = &def_mdt_aops; - return inode; + + return 0; } void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, @@ -536,29 +448,153 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } -void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) +/** + * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file + * @inode: inode of the metadata file + * @shadow: shadow mapping + */ +int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow) { - shadow->i_mapping->assoc_mapping = orig->i_mapping; - NILFS_I(shadow)->i_btnode_cache.assoc_mapping = - &NILFS_I(orig)->i_btnode_cache; + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct backing_dev_info *bdi = inode->i_sb->s_bdi; + + INIT_LIST_HEAD(&shadow->frozen_buffers); + address_space_init_once(&shadow->frozen_data); + nilfs_mapping_init(&shadow->frozen_data, inode, bdi); + address_space_init_once(&shadow->frozen_btnodes); + nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi); + mi->mi_shadow = shadow; + return 0; } -void nilfs_mdt_clear(struct inode *inode) +/** + * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map + * @inode: inode of the metadata file + */ +int nilfs_mdt_save_to_shadow_map(struct inode *inode) { + struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; + int ret; - invalidate_mapping_pages(inode->i_mapping, 0, -1); - truncate_inode_pages(inode->i_mapping, 0); + ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + if (ret) + goto out; - nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, + &ii->i_btnode_cache); + if (ret) + goto out; + + nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store); + out: + return ret; +} + +int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) +{ + struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; + struct buffer_head *bh_frozen; + struct page *page; + int blkbits = inode->i_blkbits; + + page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); + if (!page) + return -ENOMEM; + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << blkbits, 0); + + bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); + + if (!buffer_uptodate(bh_frozen)) + nilfs_copy_buffer(bh_frozen, bh); + if (list_empty(&bh_frozen->b_assoc_buffers)) { + list_add_tail(&bh_frozen->b_assoc_buffers, + &shadow->frozen_buffers); + set_buffer_nilfs_redirected(bh); + } else { + brelse(bh_frozen); /* already frozen */ + } + + unlock_page(page); + page_cache_release(page); + return 0; } -void nilfs_mdt_destroy(struct inode *inode) +struct buffer_head * +nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) { - struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; + struct buffer_head *bh_frozen = NULL; + struct page *page; + int n; + + page = find_lock_page(&shadow->frozen_data, bh->b_page->index); + if (page) { + if (page_has_buffers(page)) { + n = bh_offset(bh) >> inode->i_blkbits; + bh_frozen = nilfs_page_get_nth_block(page, n); + } + unlock_page(page); + page_cache_release(page); + } + return bh_frozen; +} + +static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow) +{ + struct list_head *head = &shadow->frozen_buffers; + struct buffer_head *bh; + + while (!list_empty(head)) { + bh = list_first_entry(head, struct buffer_head, + b_assoc_buffers); + list_del_init(&bh->b_assoc_buffers); + brelse(bh); /* drop ref-count to make it releasable */ + } +} + +/** + * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state + * @inode: inode of the metadata file + */ +void nilfs_mdt_restore_from_shadow_map(struct inode *inode) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; + + down_write(&mi->mi_sem); + + if (mi->mi_palloc_cache) + nilfs_palloc_clear_cache(inode); + + nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + + nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); + nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + + nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); + + up_write(&mi->mi_sem); +} + +/** + * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches + * @inode: inode of the metadata file + */ +void nilfs_mdt_clear_shadow_map(struct inode *inode) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; - kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ - kfree(mdi); - nilfs_destroy_inode(inode); + down_write(&mi->mi_sem); + nilfs_release_frozen_buffers(shadow); + truncate_inode_pages(&shadow->frozen_data, 0); + truncate_inode_pages(&shadow->frozen_btnodes, 0); + up_write(&mi->mi_sem); } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index df683e0bca6..ab172e8549c 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -29,23 +29,39 @@ #include "page.h" /** + * struct nilfs_shadow_map - shadow mapping of meta data file + * @bmap_store: shadow copy of bmap state + * @frozen_data: shadowed dirty data pages + * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @frozen_buffers: list of frozen buffers + */ +struct nilfs_shadow_map { + struct nilfs_bmap_store bmap_store; + struct address_space frozen_data; + struct address_space frozen_btnodes; + struct list_head frozen_buffers; +}; + +/** * struct nilfs_mdt_info - on-memory private data of meta data files - * @mi_nilfs: back pointer to the_nilfs struct * @mi_sem: reader/writer semaphore for meta data operations * @mi_bgl: per-blockgroup locking * @mi_entry_size: size of an entry * @mi_first_entry_offset: offset to the first entry * @mi_entries_per_block: number of entries in a block + * @mi_palloc_cache: persistent object allocator cache + * @mi_shadow: shadow of bmap and page caches * @mi_blocks_per_group: number of blocks in a group * @mi_blocks_per_desc_block: number of blocks per descriptor block */ struct nilfs_mdt_info { - struct the_nilfs *mi_nilfs; struct rw_semaphore mi_sem; struct blockgroup_lock *mi_bgl; unsigned mi_entry_size; unsigned mi_first_entry_offset; unsigned long mi_entries_per_block; + struct nilfs_palloc_cache *mi_palloc_cache; + struct nilfs_shadow_map *mi_shadow; unsigned long mi_blocks_per_group; unsigned long mi_blocks_per_desc_block; }; @@ -55,13 +71,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) return inode->i_private; } -static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - - return sb ? NILFS_SB(sb)->s_nilfs : NILFS_MDT(inode)->mi_nilfs; -} - /* Default GFP flags using highmem */ #define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) @@ -74,17 +83,17 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long); int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); int nilfs_mdt_fetch_dirty(struct inode *); -struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, - gfp_t); -struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, - ino_t, gfp_t); -void nilfs_mdt_destroy(struct inode *); -void nilfs_mdt_clear(struct inode *); +int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz); void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); -void nilfs_mdt_set_shadow(struct inode *, struct inode *); - -#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) +int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow); +int nilfs_mdt_save_to_shadow_map(struct inode *inode); +void nilfs_mdt_restore_from_shadow_map(struct inode *inode); +void nilfs_mdt_clear_shadow_map(struct inode *inode); +int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh); +struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, + struct buffer_head *bh); static inline void nilfs_mdt_mark_dirty(struct inode *inode) { @@ -99,27 +108,10 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode) static inline __u64 nilfs_mdt_cno(struct inode *inode) { - return NILFS_MDT(inode)->mi_nilfs->ns_cno; + return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno; } #define nilfs_mdt_bgl_lock(inode, bg) \ (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) - -static inline int -nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh, - unsigned n) -{ - return nilfs_read_inode_common( - inode, (struct nilfs_inode *)(bh->b_data + n)); -} - -static inline void -nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh, - unsigned n) -{ - nilfs_write_inode_common( - inode, (struct nilfs_inode *)(bh->b_data + n), 1); -} - #endif /* _NILFS_MDT_H */ diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index df70dadb336..9de78f08989 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -40,7 +40,11 @@ #include <linux/pagemap.h> #include "nilfs.h" +#include "export.h" +#define NILFS_FID_SIZE_NON_CONNECTABLE \ + (offsetof(struct nilfs_fid, parent_gen) / 4) +#define NILFS_FID_SIZE_CONNECTABLE (sizeof(struct nilfs_fid) / 4) static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) { @@ -59,7 +63,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) */ static struct dentry * -nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; @@ -67,35 +71,11 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, dentry); - inode = NULL; - if (ino) { - inode = nilfs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - } + ino = nilfs_inode_by_name(dir, &dentry->d_name); + inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; return d_splice_alias(inode, dentry); } -struct dentry *nilfs_get_parent(struct dentry *child) -{ - unsigned long ino; - struct inode *inode; - struct dentry dotdot; - - dotdot.d_name.name = ".."; - dotdot.d_name.len = 2; - - ino = nilfs_inode_by_name(child->d_inode, &dotdot); - if (!ino) - return ERR_PTR(-ENOENT); - - inode = nilfs_iget(child->d_inode->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - return d_obtain_alias(inode); -} - /* * By the time this is called, we already have created * the directory cache entry for the new file, but it @@ -104,8 +84,8 @@ struct dentry *nilfs_get_parent(struct dentry *child) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) +static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) { struct inode *inode; struct nilfs_transaction_info ti; @@ -120,7 +100,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, inode->i_op = &nilfs_file_inode_operations; inode->i_fop = &nilfs_file_operations; inode->i_mapping->a_ops = &nilfs_aops; - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } if (!err) @@ -132,7 +112,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, } static int -nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) +nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; struct nilfs_transaction_info ti; @@ -148,7 +128,7 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } if (!err) @@ -188,7 +168,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, goto out_fail; /* mark_inode_dirty(inode); */ - /* nilfs_new_inode() and page_symlink() do this */ + /* page_symlink() do this */ err = nilfs_add_nondir(dentry, inode); out: @@ -200,7 +180,8 @@ out: return err; out_fail: - inode_dec_link_count(inode); + drop_nlink(inode); + nilfs_mark_inode_dirty(inode); iput(inode); goto out; } @@ -212,16 +193,13 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, struct nilfs_transaction_info ti; int err; - if (inode->i_nlink >= NILFS_LINK_MAX) - return -EMLINK; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; inode->i_ctime = CURRENT_TIME; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); err = nilfs_add_nondir(dentry, inode); if (!err) @@ -232,20 +210,17 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, return err; } -static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; struct nilfs_transaction_info ti; int err; - if (dir->i_nlink >= NILFS_LINK_MAX) - return -EMLINK; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; - inode_inc_link_count(dir); + inc_nlink(dir); inode = nilfs_new_inode(dir, S_IFDIR | mode); err = PTR_ERR(inode); @@ -256,7 +231,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_fop = &nilfs_dir_operations; inode->i_mapping->a_ops = &nilfs_aops; - inode_inc_link_count(inode); + inc_nlink(inode); err = nilfs_make_empty(inode, dir); if (err) @@ -266,6 +241,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; + nilfs_mark_inode_dirty(inode); d_instantiate(dentry, inode); out: if (!err) @@ -276,28 +252,25 @@ out: return err; out_fail: - inode_dec_link_count(inode); - inode_dec_link_count(inode); + drop_nlink(inode); + drop_nlink(inode); + nilfs_mark_inode_dirty(inode); iput(inode); out_dir: - inode_dec_link_count(dir); + drop_nlink(dir); + nilfs_mark_inode_dirty(dir); goto out; } -static int nilfs_unlink(struct inode *dir, struct dentry *dentry) +static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode; struct nilfs_dir_entry *de; struct page *page; - struct nilfs_transaction_info ti; int err; - err = nilfs_transaction_begin(dir->i_sb, &ti, 0); - if (err) - return err; - err = -ENOENT; - de = nilfs_find_entry(dir, dentry, &page); + de = nilfs_find_entry(dir, &dentry->d_name, &page); if (!de) goto out; @@ -310,19 +283,35 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) nilfs_warning(inode->i_sb, __func__, "deleting nonexistent file (%lu), %d\n", inode->i_ino, inode->i_nlink); - inode->i_nlink = 1; + set_nlink(inode, 1); } err = nilfs_delete_entry(de, page); if (err) goto out; inode->i_ctime = dir->i_ctime; - inode_dec_link_count(inode); + drop_nlink(inode); err = 0; out: - if (!err) + return err; +} + +static int nilfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct nilfs_transaction_info ti; + int err; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 0); + if (err) + return err; + + err = nilfs_do_unlink(dir, dentry); + + if (!err) { + nilfs_mark_inode_dirty(dir); + nilfs_mark_inode_dirty(dentry->d_inode); err = nilfs_transaction_commit(dir->i_sb); - else + } else nilfs_transaction_abort(dir->i_sb); return err; @@ -340,11 +329,13 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; if (nilfs_empty_dir(inode)) { - err = nilfs_unlink(dir, dentry); + err = nilfs_do_unlink(dir, dentry); if (!err) { inode->i_size = 0; - inode_dec_link_count(inode); - inode_dec_link_count(dir); + drop_nlink(inode); + nilfs_mark_inode_dirty(inode); + drop_nlink(dir); + nilfs_mark_inode_dirty(dir); } } if (!err) @@ -372,7 +363,7 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, return err; err = -ENOENT; - old_de = nilfs_find_entry(old_dir, old_dentry, &old_page); + old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; @@ -392,45 +383,40 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_dir; err = -ENOENT; - new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_de) goto out_dir; - inode_inc_link_count(old_inode); nilfs_set_link(new_dir, new_de, new_page, old_inode); + nilfs_mark_inode_dirty(new_dir); new_inode->i_ctime = CURRENT_TIME; if (dir_de) drop_nlink(new_inode); - inode_dec_link_count(new_inode); + drop_nlink(new_inode); + nilfs_mark_inode_dirty(new_inode); } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= NILFS_LINK_MAX) - goto out_dir; - } - inode_inc_link_count(old_inode); err = nilfs_add_link(new_dentry, old_inode); - if (err) { - inode_dec_link_count(old_inode); + if (err) goto out_dir; + if (dir_de) { + inc_nlink(new_dir); + nilfs_mark_inode_dirty(new_dir); } - if (dir_de) - inode_inc_link_count(new_dir); } /* * Like most other Unix systems, set the ctime for inodes on a * rename. - * inode_dec_link_count() will mark the inode dirty. */ old_inode->i_ctime = CURRENT_TIME; nilfs_delete_entry(old_de, old_page); - inode_dec_link_count(old_inode); if (dir_de) { nilfs_set_link(old_inode, dir_de, dir_page, new_dir); - inode_dec_link_count(old_dir); + drop_nlink(old_dir); } + nilfs_mark_inode_dirty(old_dir); + nilfs_mark_inode_dirty(old_inode); err = nilfs_transaction_commit(old_dir->i_sb); return err; @@ -448,7 +434,114 @@ out: return err; } -struct inode_operations nilfs_dir_inode_operations = { +/* + * Export operations + */ +static struct dentry *nilfs_get_parent(struct dentry *child) +{ + unsigned long ino; + struct inode *inode; + struct qstr dotdot = QSTR_INIT("..", 2); + struct nilfs_root *root; + + ino = nilfs_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + + root = NILFS_I(child->d_inode)->i_root; + + inode = nilfs_iget(child->d_inode->i_sb, root, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + return d_obtain_alias(inode); +} + +static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno, + u64 ino, u32 gen) +{ + struct nilfs_root *root; + struct inode *inode; + + if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO) + return ERR_PTR(-ESTALE); + + root = nilfs_lookup_root(sb->s_fs_info, cno); + if (!root) + return ERR_PTR(-ESTALE); + + inode = nilfs_iget(sb, root, ino); + nilfs_put_root(root); + + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (gen && inode->i_generation != gen) { + iput(inode); + return ERR_PTR(-ESTALE); + } + return d_obtain_alias(inode); +} + +static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + + if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE && + fh_len != NILFS_FID_SIZE_CONNECTABLE) || + (fh_type != FILEID_NILFS_WITH_PARENT && + fh_type != FILEID_NILFS_WITHOUT_PARENT)) + return NULL; + + return nilfs_get_dentry(sb, fid->cno, fid->ino, fid->gen); +} + +static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + + if (fh_len != NILFS_FID_SIZE_CONNECTABLE || + fh_type != FILEID_NILFS_WITH_PARENT) + return NULL; + + return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); +} + +static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp, + struct inode *parent) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + struct nilfs_root *root = NILFS_I(inode)->i_root; + int type; + + if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) { + *lenp = NILFS_FID_SIZE_CONNECTABLE; + return FILEID_INVALID; + } + if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) { + *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; + return FILEID_INVALID; + } + + fid->cno = root->cno; + fid->ino = inode->i_ino; + fid->gen = inode->i_generation; + + if (parent) { + fid->parent_ino = parent->i_ino; + fid->parent_gen = parent->i_generation; + type = FILEID_NILFS_WITH_PARENT; + *lenp = NILFS_FID_SIZE_CONNECTABLE; + } else { + type = FILEID_NILFS_WITHOUT_PARENT; + *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; + } + + return type; +} + +const struct inode_operations nilfs_dir_inode_operations = { .create = nilfs_create, .lookup = nilfs_lookup, .link = nilfs_link, @@ -460,15 +553,24 @@ struct inode_operations nilfs_dir_inode_operations = { .rename = nilfs_rename, .setattr = nilfs_setattr, .permission = nilfs_permission, + .fiemap = nilfs_fiemap, }; -struct inode_operations nilfs_special_inode_operations = { +const struct inode_operations nilfs_special_inode_operations = { .setattr = nilfs_setattr, .permission = nilfs_permission, }; -struct inode_operations nilfs_symlink_inode_operations = { +const struct inode_operations nilfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .permission = nilfs_permission, +}; + +const struct export_operations nilfs_export_ops = { + .encode_fh = nilfs_encode_fh, + .fh_to_dentry = nilfs_fh_to_dentry, + .fh_to_parent = nilfs_fh_to_parent, + .get_parent = nilfs_get_parent, }; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index da6fc0bba2e..9bc72dec3fa 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -30,18 +30,29 @@ #include <linux/blkdev.h> #include <linux/nilfs2_fs.h> #include "the_nilfs.h" -#include "sb.h" #include "bmap.h" -#include "bmap_union.h" -/* - * nilfs inode data in memory +/** + * struct nilfs_inode_info - nilfs inode data in memory + * @i_flags: inode flags + * @i_state: dynamic state flags + * @i_bmap: pointer on i_bmap_data + * @i_bmap_data: raw block mapping + * @i_xattr: <TODO> + * @i_dir_start_lookup: page index of last successful search + * @i_cno: checkpoint number for GC inode + * @i_btnode_cache: cached pages of b-tree nodes + * @i_dirty: list for connecting dirty files + * @xattr_sem: semaphore for extended attributes processing + * @i_bh: buffer contains disk inode + * @i_root: root object of the current filesystem tree + * @vfs_inode: VFS inode object */ struct nilfs_inode_info { __u32 i_flags; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; - union nilfs_bmap_union i_bmap_union; + struct nilfs_bmap i_bmap_data; __u64 i_xattr; /* sector_t ??? */ __u32 i_dir_start_lookup; __u64 i_cno; /* check point number for GC inode */ @@ -58,12 +69,9 @@ struct nilfs_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_NILFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif struct buffer_head *i_bh; /* i_bh contains a new or dirty disk inode */ + struct nilfs_root *i_root; struct inode vfs_inode; }; @@ -75,9 +83,7 @@ static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode) static inline struct nilfs_inode_info * NILFS_BMAP_I(const struct nilfs_bmap *bmap) { - return container_of((union nilfs_bmap_union *)bmap, - struct nilfs_inode_info, - i_bmap_union); + return container_of(bmap, struct nilfs_inode_info, i_bmap_data); } static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) @@ -87,12 +93,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) return &ii->vfs_inode; } -static inline struct inode *NILFS_AS_I(struct address_space *mapping) -{ - return (mapping->host) ? : - container_of(mapping, struct inode, i_data); -} - /* * Dynamic state flags of NILFS on-memory inode (i_state) */ @@ -107,26 +107,33 @@ enum { NILFS_I_INODE_DIRTY, /* write_inode is requested */ NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_GCDAT, /* shadow DAT, on memory only */ +}; + +/* + * commit flags for nilfs_commit_super and nilfs_sync_super + */ +enum { + NILFS_SB_COMMIT = 0, /* Commit a super block alternately */ + NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; /* * Macros to check inode numbers */ #define NILFS_MDT_INO_BITS \ - ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ - 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ - 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) + ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ + 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ + 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) #define NILFS_SYS_INO_BITS \ - ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) + ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) -#define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino) +#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) #define NILFS_MDT_INODE(sb, ino) \ - ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) + ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) #define NILFS_VALID_INODE(sb, ino) \ - ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) + ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) /** * struct nilfs_transaction_info: context information for synchronization @@ -189,22 +196,14 @@ static inline int nilfs_doing_construction(void) return nilfs_test_transaction_flag(NILFS_TI_WRITER); } -static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) -{ - return nilfs_doing_gc() ? nilfs->ns_gc_dat : nilfs->ns_dat; -} - /* * function prototype */ #ifdef CONFIG_NILFS_POSIX_ACL #error "NILFS: not yet supported POSIX ACL" -extern int nilfs_permission(struct inode *, int, struct nameidata *); extern int nilfs_acl_chmod(struct inode *); extern int nilfs_init_acl(struct inode *, struct inode *); #else -#define nilfs_permission NULL - static inline int nilfs_acl_chmod(struct inode *inode) { return 0; @@ -219,12 +218,29 @@ static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) #define NILFS_ATIME_DISABLE +/* Flags that should be inherited by new inodes from their parent. */ +#define NILFS_FL_INHERITED \ + (FS_SECRM_FL | FS_UNRM_FL | FS_COMPR_FL | FS_SYNC_FL | \ + FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL |\ + FS_COMPRBLK_FL | FS_NOCOMP_FL | FS_NOTAIL_FL | FS_DIRSYNC_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & ~(FS_DIRSYNC_FL | FS_TOPDIR_FL); + else + return flags & (FS_NODUMP_FL | FS_NOATIME_FL); +} + /* dir.c */ extern int nilfs_add_link(struct dentry *, struct inode *); -extern ino_t nilfs_inode_by_name(struct inode *, struct dentry *); +extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); extern int nilfs_make_empty(struct inode *, struct inode *); extern struct nilfs_dir_entry * -nilfs_find_entry(struct inode *, struct dentry *, struct page **); +nilfs_find_entry(struct inode *, const struct qstr *, struct page **); extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *); extern int nilfs_empty_dir(struct inode *); extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **); @@ -232,50 +248,68 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, struct page *, struct inode *); /* file.c */ -extern int nilfs_sync_file(struct file *, struct dentry *, int); +extern int nilfs_sync_file(struct file *, loff_t, loff_t, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); +long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, void **); /* inode.c */ -extern struct inode *nilfs_new_inode(struct inode *, int); +void nilfs_inode_add_blocks(struct inode *inode, int n); +void nilfs_inode_sub_blocks(struct inode *inode, int n); +extern struct inode *nilfs_new_inode(struct inode *, umode_t); extern void nilfs_free_inode(struct inode *); extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern void nilfs_set_inode_flags(struct inode *); extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); -extern struct inode *nilfs_iget(struct super_block *, unsigned long); +struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +extern struct inode *nilfs_iget_for_gc(struct super_block *sb, + unsigned long ino, __u64 cno); extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); -extern void nilfs_delete_inode(struct inode *); +extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); -extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, - struct buffer_head **); +extern void nilfs_write_failed(struct address_space *mapping, loff_t to); +int nilfs_permission(struct inode *inode, int mask); +int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); -extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, - unsigned); +int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); extern int nilfs_mark_inode_dirty(struct inode *); -extern void nilfs_dirty_inode(struct inode *); - -/* namei.c */ -extern struct dentry *nilfs_get_parent(struct dentry *); +extern void nilfs_dirty_inode(struct inode *, int flags); +int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len); /* super.c */ extern struct inode *nilfs_alloc_inode(struct super_block *); extern void nilfs_destroy_inode(struct inode *); -extern void nilfs_error(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void nilfs_warning(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void nilfs_error(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void nilfs_warning(struct super_block *, const char *, const char *, ...); extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, struct nilfs_super_block *, char *); -extern int nilfs_commit_super(struct nilfs_sb_info *, int); -extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); -extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); +extern int nilfs_check_feature_compatibility(struct super_block *, + struct nilfs_super_block *); +extern void nilfs_set_log_cursor(struct nilfs_super_block *, + struct the_nilfs *); +struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, + int flip); +int nilfs_commit_super(struct super_block *sb, int flag); +int nilfs_cleanup_super(struct super_block *sb); +int nilfs_resize_fs(struct super_block *sb, __u64 newsize); +int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, + struct nilfs_root **root); +int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); /* gcinode.c */ int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, @@ -283,27 +317,19 @@ int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64, struct buffer_head **); int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); -int nilfs_init_gccache(struct the_nilfs *); -void nilfs_destroy_gccache(struct the_nilfs *); -void nilfs_clear_gcinode(struct inode *); -struct inode *nilfs_gc_iget(struct the_nilfs *, ino_t, __u64); -void nilfs_remove_all_gcinode(struct the_nilfs *); - -/* gcdat.c */ -int nilfs_init_gcdat_inode(struct the_nilfs *); -void nilfs_commit_gcdat_inode(struct the_nilfs *); -void nilfs_clear_gcdat_inode(struct the_nilfs *); +int nilfs_init_gcinode(struct inode *inode); +void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs); /* * Inodes and files operations */ -extern struct file_operations nilfs_dir_operations; -extern struct inode_operations nilfs_file_inode_operations; -extern struct file_operations nilfs_file_operations; -extern struct address_space_operations nilfs_aops; -extern struct inode_operations nilfs_dir_inode_operations; -extern struct inode_operations nilfs_special_inode_operations; -extern struct inode_operations nilfs_symlink_inode_operations; +extern const struct file_operations nilfs_dir_operations; +extern const struct inode_operations nilfs_file_inode_operations; +extern const struct file_operations nilfs_file_operations; +extern const struct address_space_operations nilfs_aops; +extern const struct inode_operations nilfs_dir_inode_operations; +extern const struct inode_operations nilfs_special_inode_operations; +extern const struct inode_operations nilfs_symlink_inode_operations; /* * filesystem type diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index a2692bbc7b5..da276640f77 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -29,6 +29,7 @@ #include <linux/list.h> #include <linux/highmem.h> #include <linux/pagevec.h> +#include <linux/gfp.h> #include "nilfs.h" #include "page.h" #include "mdt.h" @@ -36,7 +37,7 @@ #define NILFS_BUFFER_INHERENT_BITS \ ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ - (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) + (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked)) static struct buffer_head * __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, @@ -57,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, return bh; } -/* - * Since the page cache of B-tree node pages or data page cache of pseudo - * inodes does not have a valid mapping->host pointer, calling - * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; - * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). - * To avoid this problem, the old style mark_buffer_dirty() is used instead. - */ -void nilfs_mark_buffer_dirty(struct buffer_head *bh) -{ - if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) - __set_page_dirty_nobuffers(bh->b_page); -} - struct buffer_head *nilfs_grab_buffer(struct inode *inode, struct address_space *mapping, unsigned long blkoff, @@ -77,8 +65,8 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, { int blkbits = inode->i_blkbits; pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); - struct page *page, *opage; - struct buffer_head *bh, *obh; + struct page *page; + struct buffer_head *bh; page = grab_cache_page(mapping, index); if (unlikely(!page)) @@ -90,30 +78,6 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, page_cache_release(page); return NULL; } - if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { - /* - * Shadow page cache uses assoc_mapping to point its original - * page cache. The following code tries the original cache - * if the given cache is a shadow and it didn't hit. - */ - opage = find_lock_page(mapping->assoc_mapping, index); - if (!opage) - return bh; - - obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, - b_state); - if (buffer_uptodate(obh)) { - nilfs_copy_buffer(bh, obh); - if (buffer_dirty(obh)) { - nilfs_mark_buffer_dirty(bh); - if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) - nilfs_mdt_mark_dirty(inode); - } - } - brelse(obh); - unlock_page(opage); - page_cache_release(opage); - } return bh; } @@ -128,6 +92,9 @@ void nilfs_forget_buffer(struct buffer_head *bh) lock_buffer(bh); clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); + clear_buffer_async_write(bh); clear_buffer_dirty(bh); if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); @@ -153,11 +120,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) struct page *spage = sbh->b_page, *dpage = dbh->b_page; struct buffer_head *bh; - kaddr0 = kmap_atomic(spage, KM_USER0); - kaddr1 = kmap_atomic(dpage, KM_USER1); + kaddr0 = kmap_atomic(spage); + kaddr1 = kmap_atomic(dpage); memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); - kunmap_atomic(kaddr1, KM_USER1); - kunmap_atomic(kaddr0, KM_USER0); + kunmap_atomic(kaddr1); + kunmap_atomic(kaddr0); dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; dbh->b_blocknr = sbh->b_blocknr; @@ -203,7 +170,7 @@ int nilfs_page_buffers_clean(struct page *page) void nilfs_page_bug(struct page *page) { struct address_space *m; - unsigned long ino = 0; + unsigned long ino; if (unlikely(!page)) { printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); @@ -211,11 +178,8 @@ void nilfs_page_bug(struct page *page) } m = page->mapping; - if (m) { - struct inode *inode = NILFS_AS_I(m); - if (inode != NULL) - ino = inode->i_ino; - } + ino = m ? m->host->i_ino : 0; + printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " "mapping=%p ino=%lu\n", page, atomic_read(&page->_count), @@ -237,62 +201,12 @@ void nilfs_page_bug(struct page *page) } /** - * nilfs_alloc_private_page - allocate a private page with buffer heads - * - * Return Value: On success, a pointer to the allocated page is returned. - * On error, NULL is returned. - */ -struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, - unsigned long state) -{ - struct buffer_head *bh, *head, *tail; - struct page *page; - - page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ - if (unlikely(!page)) - return NULL; - - lock_page(page); - head = alloc_page_buffers(page, size, 0); - if (unlikely(!head)) { - unlock_page(page); - __free_page(page); - return NULL; - } - - bh = head; - do { - bh->b_state = (1UL << BH_NILFS_Allocated) | state; - tail = bh; - bh->b_bdev = bdev; - bh = bh->b_this_page; - } while (bh); - - tail->b_this_page = head; - attach_page_buffers(page, head); - - return page; -} - -void nilfs_free_private_page(struct page *page) -{ - BUG_ON(!PageLocked(page)); - BUG_ON(page->mapping); - - if (page_has_buffers(page) && !try_to_free_buffers(page)) - NILFS_PAGE_BUG(page, "failed to free page"); - - unlock_page(page); - __free_page(page); -} - -/** * nilfs_copy_page -- copy the page with buffers * @dst: destination page * @src: source page * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. * - * This fuction is for both data pages and btnode pages. The dirty flag + * This function is for both data pages and btnode pages. The dirty flag * should be treated by caller. The page must not be under i/o. * Both src and dst page must be locked */ @@ -388,7 +302,7 @@ repeat: } /** - * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache + * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache * @dmap: destination page cache * @smap: source page cache * @@ -457,7 +371,12 @@ repeat: goto repeat; } -void nilfs_clear_dirty_pages(struct address_space *mapping) +/** + * nilfs_clear_dirty_pages - discard dirty pages in address space + * @mapping: address space with dirty pages for discarding + * @silent: suppress [true] or print [false] warning messages + */ +void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) { struct pagevec pvec; unsigned int i; @@ -469,23 +388,9 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) PAGEVEC_SIZE)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - struct buffer_head *bh, *head; lock_page(page); - ClearPageUptodate(page); - ClearPageMappedToDisk(page); - bh = head = page_buffers(page); - do { - lock_buffer(bh); - clear_buffer_dirty(bh); - clear_buffer_nilfs_volatile(bh); - clear_buffer_uptodate(bh); - clear_buffer_mapped(bh); - unlock_buffer(bh); - bh = bh->b_this_page; - } while (bh != head); - - __nilfs_clear_page_dirty(page); + nilfs_clear_dirty_page(page, silent); unlock_page(page); } pagevec_release(&pvec); @@ -493,6 +398,52 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) } } +/** + * nilfs_clear_dirty_page - discard dirty page + * @page: dirty page that will be discarded + * @silent: suppress [true] or print [false] warning messages + */ +void nilfs_clear_dirty_page(struct page *page, bool silent) +{ + struct inode *inode = page->mapping->host; + struct super_block *sb = inode->i_sb; + + BUG_ON(!PageLocked(page)); + + if (!silent) { + nilfs_warning(sb, __func__, + "discard page: offset %lld, ino %lu", + page_offset(page), inode->i_ino); + } + + ClearPageUptodate(page); + ClearPageMappedToDisk(page); + + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + lock_buffer(bh); + if (!silent) { + nilfs_warning(sb, __func__, + "discard block %llu, size %zu", + (u64)bh->b_blocknr, bh->b_size); + } + clear_buffer_async_write(bh); + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); + clear_buffer_uptodate(bh); + clear_buffer_mapped(bh); + unlock_buffer(bh); + } while (bh = bh->b_this_page, bh != head); + } + + __nilfs_clear_page_dirty(page); +} + unsigned nilfs_page_count_clean_buffers(struct page *page, unsigned from, unsigned to) { @@ -510,6 +461,17 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, return nc; } +void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, + struct backing_dev_info *bdi) +{ + mapping->host = inode; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_NOFS); + mapping->private_data = NULL; + mapping->backing_dev_info = bdi; + mapping->a_ops = &empty_aops; +} + /* * NILFS2 needs clear_page_dirty() in the following two cases: * @@ -539,3 +501,87 @@ int __nilfs_clear_page_dirty(struct page *page) } return TestClearPageDirty(page); } + +/** + * nilfs_find_uncommitted_extent - find extent of uncommitted data + * @inode: inode + * @start_blk: start block offset (in) + * @blkoff: start offset of the found extent (out) + * + * This function searches an extent of buffers marked "delayed" which + * starts from a block offset equal to or larger than @start_blk. If + * such an extent was found, this will store the start offset in + * @blkoff and return its length in blocks. Otherwise, zero is + * returned. + */ +unsigned long nilfs_find_uncommitted_extent(struct inode *inode, + sector_t start_blk, + sector_t *blkoff) +{ + unsigned int i; + pgoff_t index; + unsigned int nblocks_in_page; + unsigned long length = 0; + sector_t b; + struct pagevec pvec; + struct page *page; + + if (inode->i_mapping->nrpages == 0) + return 0; + + index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + pagevec_init(&pvec, 0); + +repeat: + pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, + pvec.pages); + if (pvec.nr == 0) + return length; + + if (length > 0 && pvec.pages[0]->index > index) + goto out; + + b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + i = 0; + do { + page = pvec.pages[i]; + + lock_page(page); + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + if (b < start_blk) + continue; + if (buffer_delay(bh)) { + if (length == 0) + *blkoff = b; + length++; + } else if (length > 0) { + goto out_locked; + } + } while (++b, bh = bh->b_this_page, bh != head); + } else { + if (length > 0) + goto out_locked; + + b += nblocks_in_page; + } + unlock_page(page); + + } while (++i < pagevec_count(&pvec)); + + index = page->index + 1; + pagevec_release(&pvec); + cond_resched(); + goto repeat; + +out_locked: + unlock_page(page); +out: + pagevec_release(&pvec); + return length; +} diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 8abca4d1c1f..ef30c5c2426 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -34,14 +34,16 @@ enum { BH_NILFS_Allocated = BH_PrivateStart, BH_NILFS_Node, BH_NILFS_Volatile, + BH_NILFS_Checked, + BH_NILFS_Redirected, }; -BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ BUFFER_FNS(NILFS_Volatile, nilfs_volatile) +BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ +BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ -void nilfs_mark_buffer_dirty(struct buffer_head *bh); int __nilfs_clear_page_dirty(struct page *); struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, @@ -50,14 +52,17 @@ void nilfs_forget_buffer(struct buffer_head *); void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); int nilfs_page_buffers_clean(struct page *); void nilfs_page_bug(struct page *); -struct page *nilfs_alloc_private_page(struct block_device *, int, - unsigned long); -void nilfs_free_private_page(struct page *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_dirty_pages(struct address_space *); +void nilfs_clear_dirty_page(struct page *, bool); +void nilfs_clear_dirty_pages(struct address_space *, bool); +void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, + struct backing_dev_info *bdi); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); +unsigned long nilfs_find_uncommitted_extent(struct inode *inode, + sector_t start_blk, + sector_t *blkoff); #define NILFS_PAGE_BUG(page, m, a...) \ do { nilfs_page_bug(page); BUG(); } while (0) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 57afa9d2406..ff00a0b7acb 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -23,12 +23,12 @@ #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/swap.h> +#include <linux/slab.h> #include <linux/crc32.h> #include "nilfs.h" #include "segment.h" #include "sufile.h" #include "page.h" -#include "seglist.h" #include "segbuf.h" /* @@ -40,7 +40,6 @@ enum { NILFS_SEG_FAIL_IO, NILFS_SEG_FAIL_MAGIC, NILFS_SEG_FAIL_SEQ, - NILFS_SEG_FAIL_CHECKSUM_SEGSUM, NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT, NILFS_SEG_FAIL_CHECKSUM_FULL, NILFS_SEG_FAIL_CONSISTENCY, @@ -72,10 +71,6 @@ static int nilfs_warn_segment_error(int err) printk(KERN_WARNING "NILFS warning: Sequence number mismatch\n"); break; - case NILFS_SEG_FAIL_CHECKSUM_SEGSUM: - printk(KERN_WARNING - "NILFS warning: Checksum error in segment summary\n"); - break; case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT: printk(KERN_WARNING "NILFS warning: Checksum error in super root\n"); @@ -96,25 +91,9 @@ static int nilfs_warn_segment_error(int err) return -EINVAL; } -static void store_segsum_info(struct nilfs_segsum_info *ssi, - struct nilfs_segment_summary *sum, - unsigned int blocksize) -{ - ssi->flags = le16_to_cpu(sum->ss_flags); - ssi->seg_seq = le64_to_cpu(sum->ss_seq); - ssi->ctime = le64_to_cpu(sum->ss_create); - ssi->next = le64_to_cpu(sum->ss_next); - ssi->nblocks = le32_to_cpu(sum->ss_nblocks); - ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo); - ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes); - - ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize); - ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi); -} - /** - * calc_crc_cont - check CRC of blocks continuously - * @sbi: nilfs_sb_info + * nilfs_compute_checksum - compute checksum of blocks continuously + * @nilfs: nilfs object * @bhs: buffer head of start block * @sum: place to store result * @offset: offset bytes in the first block @@ -122,23 +101,25 @@ static void store_segsum_info(struct nilfs_segsum_info *ssi, * @start: DBN of start block * @nblock: number of blocks to be checked */ -static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, - u32 *sum, unsigned long offset, u64 check_bytes, - sector_t start, unsigned long nblock) +static int nilfs_compute_checksum(struct the_nilfs *nilfs, + struct buffer_head *bhs, u32 *sum, + unsigned long offset, u64 check_bytes, + sector_t start, unsigned long nblock) { - unsigned long blocksize = sbi->s_super->s_blocksize; + unsigned int blocksize = nilfs->ns_blocksize; unsigned long size; u32 crc; BUG_ON(offset >= blocksize); check_bytes -= offset; size = min_t(u64, check_bytes, blocksize - offset); - crc = crc32_le(sbi->s_nilfs->ns_crc_seed, + crc = crc32_le(nilfs->ns_crc_seed, (unsigned char *)bhs->b_data + offset, size); if (--nblock > 0) { do { - struct buffer_head *bh - = sb_bread(sbi->s_super, ++start); + struct buffer_head *bh; + + bh = __bread(nilfs->ns_bdev, ++start, blocksize); if (!bh) return -EIO; check_bytes -= size; @@ -153,12 +134,12 @@ static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, /** * nilfs_read_super_root_block - read super root block - * @sb: super_block + * @nilfs: nilfs object * @sr_block: disk block number of the super root block * @pbh: address of a buffer_head pointer to return super root buffer * @check: CRC check flag */ -int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, +int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, struct buffer_head **pbh, int check) { struct buffer_head *bh_sr; @@ -167,7 +148,7 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, int ret; *pbh = NULL; - bh_sr = sb_bread(sb, sr_block); + bh_sr = __bread(nilfs->ns_bdev, sr_block, nilfs->ns_blocksize); if (unlikely(!bh_sr)) { ret = NILFS_SEG_FAIL_IO; goto failed; @@ -177,12 +158,13 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, if (check) { unsigned bytes = le16_to_cpu(sr->sr_bytes); - if (bytes == 0 || bytes > sb->s_blocksize) { + if (bytes == 0 || bytes > nilfs->ns_blocksize) { ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; goto failed_bh; } - if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc, - sizeof(sr->sr_sum), bytes, sr_block, 1)) { + if (nilfs_compute_checksum( + nilfs, bh_sr, &crc, sizeof(sr->sr_sum), bytes, + sr_block, 1)) { ret = NILFS_SEG_FAIL_IO; goto failed_bh; } @@ -202,78 +184,76 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, } /** - * load_segment_summary - read segment summary of the specified partial segment - * @sbi: nilfs_sb_info - * @pseg_start: start disk block number of partial segment - * @seg_seq: sequence number requested - * @ssi: pointer to nilfs_segsum_info struct to store information - * @full_check: full check flag - * (0: only checks segment summary CRC, 1: data CRC) + * nilfs_read_log_header - read summary header of the specified log + * @nilfs: nilfs object + * @start_blocknr: start block number of the log + * @sum: pointer to return segment summary structure */ -static int -load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start, - u64 seg_seq, struct nilfs_segsum_info *ssi, - int full_check) +static struct buffer_head * +nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, + struct nilfs_segment_summary **sum) { struct buffer_head *bh_sum; - struct nilfs_segment_summary *sum; - unsigned long offset, nblock; - u64 check_bytes; - u32 crc, crc_sum; - int ret = NILFS_SEG_FAIL_IO; - bh_sum = sb_bread(sbi->s_super, pseg_start); - if (!bh_sum) - goto out; + bh_sum = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize); + if (bh_sum) + *sum = (struct nilfs_segment_summary *)bh_sum->b_data; + return bh_sum; +} + +/** + * nilfs_validate_log - verify consistency of log + * @nilfs: nilfs object + * @seg_seq: sequence number of segment + * @bh_sum: buffer head of summary block + * @sum: segment summary struct + */ +static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq, + struct buffer_head *bh_sum, + struct nilfs_segment_summary *sum) +{ + unsigned long nblock; + u32 crc; + int ret; - sum = (struct nilfs_segment_summary *)bh_sum->b_data; + ret = NILFS_SEG_FAIL_MAGIC; + if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) + goto out; - /* Check consistency of segment summary */ - if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) { - ret = NILFS_SEG_FAIL_MAGIC; - goto failed; - } - store_segsum_info(ssi, sum, sbi->s_super->s_blocksize); - if (seg_seq != ssi->seg_seq) { - ret = NILFS_SEG_FAIL_SEQ; - goto failed; - } - if (full_check) { - offset = sizeof(sum->ss_datasum); - check_bytes = - ((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits); - nblock = ssi->nblocks; - crc_sum = le32_to_cpu(sum->ss_datasum); - ret = NILFS_SEG_FAIL_CHECKSUM_FULL; - } else { /* only checks segment summary */ - offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum); - check_bytes = ssi->sumbytes; - nblock = ssi->nsumblk; - crc_sum = le32_to_cpu(sum->ss_sumsum); - ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM; - } + ret = NILFS_SEG_FAIL_SEQ; + if (le64_to_cpu(sum->ss_seq) != seg_seq) + goto out; - if (unlikely(nblock == 0 || - nblock > sbi->s_nilfs->ns_blocks_per_segment)) { + nblock = le32_to_cpu(sum->ss_nblocks); + ret = NILFS_SEG_FAIL_CONSISTENCY; + if (unlikely(nblock == 0 || nblock > nilfs->ns_blocks_per_segment)) /* This limits the number of blocks read in the CRC check */ - ret = NILFS_SEG_FAIL_CONSISTENCY; - goto failed; - } - if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes, - pseg_start, nblock)) { - ret = NILFS_SEG_FAIL_IO; - goto failed; - } - if (crc == crc_sum) - ret = 0; - failed: - brelse(bh_sum); - out: + goto out; + + ret = NILFS_SEG_FAIL_IO; + if (nilfs_compute_checksum(nilfs, bh_sum, &crc, sizeof(sum->ss_datasum), + ((u64)nblock << nilfs->ns_blocksize_bits), + bh_sum->b_blocknr, nblock)) + goto out; + + ret = NILFS_SEG_FAIL_CHECKSUM_FULL; + if (crc != le32_to_cpu(sum->ss_datasum)) + goto out; + ret = 0; +out: return ret; } -static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, - unsigned int *offset, unsigned int bytes) +/** + * nilfs_read_summary_info - read an item on summary blocks of a log + * @nilfs: nilfs object + * @pbh: the current buffer head on summary blocks [in, out] + * @offset: the current byte offset on summary blocks [in, out] + * @bytes: byte size of the item to be read + */ +static void *nilfs_read_summary_info(struct the_nilfs *nilfs, + struct buffer_head **pbh, + unsigned int *offset, unsigned int bytes) { void *ptr; sector_t blocknr; @@ -282,7 +262,8 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, if (bytes > (*pbh)->b_size - *offset) { blocknr = (*pbh)->b_blocknr; brelse(*pbh); - *pbh = sb_bread(sb, blocknr + 1); + *pbh = __bread(nilfs->ns_bdev, blocknr + 1, + nilfs->ns_blocksize); if (unlikely(!*pbh)) return NULL; *offset = 0; @@ -292,9 +273,18 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, return ptr; } -static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, - unsigned int *offset, unsigned int bytes, - unsigned long count) +/** + * nilfs_skip_summary_info - skip items on summary blocks of a log + * @nilfs: nilfs object + * @pbh: the current buffer head on summary blocks [in, out] + * @offset: the current byte offset on summary blocks [in, out] + * @bytes: byte size of the item to be skipped + * @count: number of items to be skipped + */ +static void nilfs_skip_summary_info(struct the_nilfs *nilfs, + struct buffer_head **pbh, + unsigned int *offset, unsigned int bytes, + unsigned long count) { unsigned int rest_item_in_current_block = ((*pbh)->b_size - *offset) / bytes; @@ -311,36 +301,46 @@ static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, *offset = bytes * (count - (bcnt - 1) * nitem_per_block); brelse(*pbh); - *pbh = sb_bread(sb, blocknr + bcnt); + *pbh = __bread(nilfs->ns_bdev, blocknr + bcnt, + nilfs->ns_blocksize); } } -static int -collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, - struct nilfs_segsum_info *ssi, - struct list_head *head) +/** + * nilfs_scan_dsync_log - get block information of a log written for data sync + * @nilfs: nilfs object + * @start_blocknr: start block number of the log + * @sum: log summary information + * @head: list head to add nilfs_recovery_block struct + */ +static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr, + struct nilfs_segment_summary *sum, + struct list_head *head) { struct buffer_head *bh; unsigned int offset; - unsigned long nfinfo = ssi->nfinfo; - sector_t blocknr = sum_blocknr + ssi->nsumblk; + u32 nfinfo, sumbytes; + sector_t blocknr; ino_t ino; int err = -EIO; + nfinfo = le32_to_cpu(sum->ss_nfinfo); if (!nfinfo) return 0; - bh = sb_bread(sbi->s_super, sum_blocknr); + sumbytes = le32_to_cpu(sum->ss_sumbytes); + blocknr = start_blocknr + DIV_ROUND_UP(sumbytes, nilfs->ns_blocksize); + bh = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize); if (unlikely(!bh)) goto out; - offset = le16_to_cpu( - ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes); + offset = le16_to_cpu(sum->ss_bytes); for (;;) { unsigned long nblocks, ndatablk, nnodeblk; struct nilfs_finfo *finfo; - finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo)); + finfo = nilfs_read_summary_info(nilfs, &bh, &offset, + sizeof(*finfo)); if (unlikely(!finfo)) goto out; @@ -353,8 +353,8 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, struct nilfs_recovery_block *rb; struct nilfs_binfo_v *binfo; - binfo = segsum_get(sbi->s_super, &bh, &offset, - sizeof(*binfo)); + binfo = nilfs_read_summary_info(nilfs, &bh, &offset, + sizeof(*binfo)); if (unlikely(!binfo)) goto out; @@ -372,9 +372,9 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, } if (--nfinfo == 0) break; - blocknr += nnodeblk; /* always 0 for the data sync segments */ - segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64), - nnodeblk); + blocknr += nnodeblk; /* always 0 for data sync logs */ + nilfs_skip_summary_info(nilfs, &bh, &offset, sizeof(__le64), + nnodeblk); if (unlikely(!bh)) goto out; } @@ -387,27 +387,45 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, static void dispose_recovery_list(struct list_head *head) { while (!list_empty(head)) { - struct nilfs_recovery_block *rb - = list_entry(head->next, - struct nilfs_recovery_block, list); + struct nilfs_recovery_block *rb; + + rb = list_first_entry(head, struct nilfs_recovery_block, list); list_del(&rb->list); kfree(rb); } } +struct nilfs_segment_entry { + struct list_head list; + __u64 segnum; +}; + +static int nilfs_segment_list_add(struct list_head *head, __u64 segnum) +{ + struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); + + if (unlikely(!ent)) + return -ENOMEM; + + ent->segnum = segnum; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, head); + return 0; +} + void nilfs_dispose_segment_list(struct list_head *head) { while (!list_empty(head)) { - struct nilfs_segment_entry *ent - = list_entry(head->next, - struct nilfs_segment_entry, list); + struct nilfs_segment_entry *ent; + + ent = list_first_entry(head, struct nilfs_segment_entry, list); list_del(&ent->list); - nilfs_free_segment_entry(ent); + kfree(ent); } } static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, + struct super_block *sb, struct nilfs_recovery_info *ri) { struct list_head *head = &ri->ri_used_segments; @@ -422,7 +440,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, segnum[2] = ri->ri_segnum; segnum[3] = ri->ri_nextnum; - nilfs_attach_writer(nilfs, sbi); /* * Releasing the next segment of the latest super root. * The next segment is invalidated by this recovery. @@ -431,12 +448,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, if (unlikely(err)) goto failed; - err = -ENOMEM; for (i = 1; i < 4; i++) { - ent = nilfs_alloc_segment_entry(segnum[i]); - if (unlikely(!ent)) + err = nilfs_segment_list_add(head, segnum[i]); + if (unlikely(err)) goto failed; - list_add_tail(&ent->list, head); } /* @@ -450,7 +465,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, goto failed; } list_del(&ent->list); - nilfs_free_segment_entry(ent); + kfree(ent); } /* Allocate new segments for recovery */ @@ -464,41 +479,42 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, failed: /* No need to recover sufile because it will be destroyed on error */ - nilfs_detach_writer(nilfs, sbi); return err; } -static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, +static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, struct page *page) { struct buffer_head *bh_org; void *kaddr; - bh_org = sb_bread(sbi->s_super, rb->blocknr); + bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); if (unlikely(!bh_org)) return -EIO; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh_org); return 0; } -static int recover_dsync_blocks(struct nilfs_sb_info *sbi, - struct list_head *head, - unsigned long *nr_salvaged_blocks) +static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, + struct super_block *sb, + struct nilfs_root *root, + struct list_head *head, + unsigned long *nr_salvaged_blocks) { struct inode *inode; struct nilfs_recovery_block *rb, *n; - unsigned blocksize = sbi->s_super->s_blocksize; + unsigned blocksize = nilfs->ns_blocksize; struct page *page; loff_t pos; int err = 0, err2 = 0; list_for_each_entry_safe(rb, n, head, list) { - inode = nilfs_iget(sbi->s_super, rb->ino); + inode = nilfs_iget(sb, root, rb->ino); if (IS_ERR(inode)) { err = PTR_ERR(inode); inode = NULL; @@ -506,17 +522,21 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, } pos = rb->blkoff << inode->i_blkbits; - page = NULL; - err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, - 0, &page, NULL, nilfs_get_block); - if (unlikely(err)) + err = block_write_begin(inode->i_mapping, pos, blocksize, + 0, &page, nilfs_get_block); + if (unlikely(err)) { + loff_t isize = inode->i_size; + if (pos + blocksize > isize) + nilfs_write_failed(inode->i_mapping, + pos + blocksize); goto failed_inode; + } - err = nilfs_recovery_copy_block(sbi, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, page); if (unlikely(err)) goto failed_page; - err = nilfs_set_file_dirty(sbi, inode, 1); + err = nilfs_set_file_dirty(inode, 1); if (unlikely(err)) goto failed_page; @@ -537,7 +557,8 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, printk(KERN_WARNING "NILFS warning: error recovering data block " "(err=%d, ino=%lu, block-offset=%llu)\n", - err, rb->ino, (unsigned long long)rb->blkoff); + err, (unsigned long)rb->ino, + (unsigned long long)rb->blkoff); if (!err2) err2 = err; next: @@ -551,18 +572,21 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, /** * nilfs_do_roll_forward - salvage logical segments newer than the latest * checkpoint - * @sbi: nilfs_sb_info - * @nilfs: the_nilfs + * @nilfs: nilfs object + * @sb: super block instance * @ri: pointer to a nilfs_recovery_info */ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, + struct super_block *sb, + struct nilfs_root *root, struct nilfs_recovery_info *ri) { - struct nilfs_segsum_info ssi; + struct buffer_head *bh_sum = NULL; + struct nilfs_segment_summary *sum; sector_t pseg_start; sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ unsigned long nsalvaged_blocks = 0; + unsigned int flags; u64 seg_seq; __u64 segnum, nextnum = 0; int empty_seg = 0; @@ -574,15 +598,20 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, }; int state = RF_INIT_ST; - nilfs_attach_writer(nilfs, sbi); pseg_start = ri->ri_lsegs_start; seg_seq = ri->ri_lsegs_start_seq; segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) { + brelse(bh_sum); + bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum); + if (!bh_sum) { + err = -EIO; + goto failed; + } - ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); + ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum); if (ret) { if (ret == NILFS_SEG_FAIL_IO) { err = -EIO; @@ -590,33 +619,38 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, } goto strayed; } - if (unlikely(NILFS_SEG_HAS_SR(&ssi))) + + flags = le16_to_cpu(sum->ss_flags); + if (flags & NILFS_SS_SR) goto confused; /* Found a valid partial segment; do recovery actions */ - nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); + nextnum = nilfs_get_segnum_of_block(nilfs, + le64_to_cpu(sum->ss_next)); empty_seg = 0; - nilfs->ns_ctime = ssi.ctime; - if (!(ssi.flags & NILFS_SS_GC)) - nilfs->ns_nongc_ctime = ssi.ctime; + nilfs->ns_ctime = le64_to_cpu(sum->ss_create); + if (!(flags & NILFS_SS_GC)) + nilfs->ns_nongc_ctime = nilfs->ns_ctime; switch (state) { case RF_INIT_ST: - if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi)) + if (!(flags & NILFS_SS_LOGBGN) || + !(flags & NILFS_SS_SYNDT)) goto try_next_pseg; state = RF_DSYNC_ST; /* Fall through */ case RF_DSYNC_ST: - if (!NILFS_SEG_DSYNC(&ssi)) + if (!(flags & NILFS_SS_SYNDT)) goto confused; - err = collect_blocks_from_segsum( - sbi, pseg_start, &ssi, &dsync_blocks); + err = nilfs_scan_dsync_log(nilfs, pseg_start, sum, + &dsync_blocks); if (unlikely(err)) goto failed; - if (NILFS_SEG_LOGEND(&ssi)) { - err = recover_dsync_blocks( - sbi, &dsync_blocks, &nsalvaged_blocks); + if (flags & NILFS_SS_LOGEND) { + err = nilfs_recover_dsync_blocks( + nilfs, sb, root, &dsync_blocks, + &nsalvaged_blocks); if (unlikely(err)) goto failed; state = RF_INIT_ST; @@ -627,7 +661,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, try_next_pseg: if (pseg_start == ri->ri_lsegs_end) break; - pseg_start += ssi.nblocks; + pseg_start += le32_to_cpu(sum->ss_nblocks); if (pseg_start < seg_end) continue; goto feed_segment; @@ -648,12 +682,12 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, if (nsalvaged_blocks) { printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", - sbi->s_super->s_id, nsalvaged_blocks); + sb->s_id, nsalvaged_blocks); ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; } out: + brelse(bh_sum); dispose_recovery_list(&dsync_blocks); - nilfs_detach_writer(sbi->s_nilfs, sbi); return err; confused: @@ -662,12 +696,11 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, printk(KERN_ERR "NILFS (device %s): Error roll-forwarding " "(err=%d, pseg block=%llu). ", - sbi->s_super->s_id, err, (unsigned long long)pseg_start); + sb->s_id, err, (unsigned long long)pseg_start); goto out; } static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, struct nilfs_recovery_info *ri) { struct buffer_head *bh; @@ -677,7 +710,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, nilfs_get_segnum_of_block(nilfs, ri->ri_super_root)) return; - bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start); + bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize); BUG_ON(!bh); memset(bh->b_data, 0, bh->b_size); set_buffer_dirty(bh); @@ -690,10 +723,9 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, } /** - * nilfs_recover_logical_segments - salvage logical segments written after - * the latest super root - * @nilfs: the_nilfs - * @sbi: nilfs_sb_info + * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint + * @nilfs: nilfs object + * @sb: super block instance * @ri: pointer to a nilfs_recovery_info struct to store search results. * * Return Value: On success, 0 is returned. On error, one of the following @@ -709,41 +741,42 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, * * %-ENOMEM - Insufficient memory available. */ -int nilfs_recover_logical_segments(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, - struct nilfs_recovery_info *ri) +int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, + struct super_block *sb, + struct nilfs_recovery_info *ri) { + struct nilfs_root *root; int err; if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) return 0; - err = nilfs_attach_checkpoint(sbi, ri->ri_cno); + err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root); if (unlikely(err)) { printk(KERN_ERR "NILFS: error loading the latest checkpoint.\n"); return err; } - err = nilfs_do_roll_forward(nilfs, sbi, ri); + err = nilfs_do_roll_forward(nilfs, sb, root, ri); if (unlikely(err)) goto failed; if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { - err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri); + err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri); if (unlikely(err)) { printk(KERN_ERR "NILFS: Error preparing segments for " "recovery.\n"); goto failed; } - err = nilfs_attach_segment_constructor(sbi); + err = nilfs_attach_log_writer(sb, root); if (unlikely(err)) goto failed; set_nilfs_discontinued(nilfs); - err = nilfs_construct_segment(sbi->s_super); - nilfs_detach_segment_constructor(sbi); + err = nilfs_construct_segment(sb); + nilfs_detach_log_writer(sb); if (unlikely(err)) { printk(KERN_ERR "NILFS: Oops! recovery failed. " @@ -751,24 +784,17 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, goto failed; } - nilfs_finish_roll_forward(nilfs, sbi, ri); + nilfs_finish_roll_forward(nilfs, ri); } - nilfs_detach_checkpoint(sbi); - return 0; - failed: - nilfs_detach_checkpoint(sbi); - nilfs_mdt_clear(nilfs->ns_cpfile); - nilfs_mdt_clear(nilfs->ns_sufile); - nilfs_mdt_clear(nilfs->ns_dat); + nilfs_put_root(root); return err; } /** * nilfs_search_super_root - search the latest valid super root * @nilfs: the_nilfs - * @sbi: nilfs_sb_info * @ri: pointer to a nilfs_recovery_info struct to store search results. * * nilfs_search_super_root() looks for the latest super-root from a partial @@ -781,17 +807,22 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, * %-EINVAL - No valid segment found * * %-EIO - I/O error + * + * %-ENOMEM - Insufficient memory available. */ -int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, +int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) { - struct nilfs_segsum_info ssi; + struct buffer_head *bh_sum = NULL; + struct nilfs_segment_summary *sum; sector_t pseg_start, pseg_end, sr_pseg_start = 0; sector_t seg_start, seg_end; /* range of full segment (block number) */ + sector_t b, end; + unsigned long nblocks; + unsigned int flags; u64 seg_seq; __u64 segnum, nextnum = 0; __u64 cno; - struct nilfs_segment_entry *ent; LIST_HEAD(segments); int empty_seg = 0, scan_newer = 0; int ret; @@ -804,15 +835,27 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, /* Calculate range of segment */ nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); + /* Read ahead segment */ + b = seg_start; + while (b <= seg_end) + __breadahead(nilfs->ns_bdev, b++, nilfs->ns_blocksize); + for (;;) { - /* Load segment summary */ - ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); + brelse(bh_sum); + ret = NILFS_SEG_FAIL_IO; + bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum); + if (!bh_sum) + goto failed; + + ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum); if (ret) { if (ret == NILFS_SEG_FAIL_IO) goto failed; goto strayed; } - pseg_end = pseg_start + ssi.nblocks - 1; + + nblocks = le32_to_cpu(sum->ss_nblocks); + pseg_end = pseg_start + nblocks - 1; if (unlikely(pseg_end > seg_end)) { ret = NILFS_SEG_FAIL_CONSISTENCY; goto strayed; @@ -822,23 +865,32 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, ri->ri_pseg_start = pseg_start; ri->ri_seq = seg_seq; ri->ri_segnum = segnum; - nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); + nextnum = nilfs_get_segnum_of_block(nilfs, + le64_to_cpu(sum->ss_next)); ri->ri_nextnum = nextnum; empty_seg = 0; - if (!NILFS_SEG_HAS_SR(&ssi)) { - if (!scan_newer) { - /* This will never happen because a superblock - (last_segment) always points to a pseg - having a super root. */ - ret = NILFS_SEG_FAIL_CONSISTENCY; - goto failed; - } - if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { + flags = le16_to_cpu(sum->ss_flags); + if (!(flags & NILFS_SS_SR) && !scan_newer) { + /* This will never happen because a superblock + (last_segment) always points to a pseg + having a super root. */ + ret = NILFS_SEG_FAIL_CONSISTENCY; + goto failed; + } + + if (pseg_start == seg_start) { + nilfs_get_segment_range(nilfs, nextnum, &b, &end); + while (b <= end) + __breadahead(nilfs->ns_bdev, b++, + nilfs->ns_blocksize); + } + if (!(flags & NILFS_SS_SR)) { + if (!ri->ri_lsegs_start && (flags & NILFS_SS_LOGBGN)) { ri->ri_lsegs_start = pseg_start; ri->ri_lsegs_start_seq = seg_seq; } - if (NILFS_SEG_LOGEND(&ssi)) + if (flags & NILFS_SS_LOGEND) ri->ri_lsegs_end = pseg_start; goto try_next_pseg; } @@ -849,12 +901,12 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, ri->ri_lsegs_start = ri->ri_lsegs_end = 0; nilfs_dispose_segment_list(&segments); - nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start) - + ssi.nblocks - seg_start; + sr_pseg_start = pseg_start; + nilfs->ns_pseg_offset = pseg_start + nblocks - seg_start; nilfs->ns_seg_seq = seg_seq; nilfs->ns_segnum = segnum; nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */ - nilfs->ns_ctime = ssi.ctime; + nilfs->ns_ctime = le64_to_cpu(sum->ss_create); nilfs->ns_nextnum = nextnum; if (scan_newer) @@ -865,15 +917,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, scan_newer = 1; } - /* reset region for roll-forward */ - pseg_start += ssi.nblocks; - if (pseg_start < seg_end) - continue; - goto feed_segment; - try_next_pseg: /* Standing on a course, or met an inconsistent state */ - pseg_start += ssi.nblocks; + pseg_start += nblocks; if (pseg_start < seg_end) continue; goto feed_segment; @@ -892,12 +938,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, if (empty_seg++) goto super_root_found; /* found a valid super root */ - ent = nilfs_alloc_segment_entry(segnum); - if (unlikely(!ent)) { - ret = -ENOMEM; + ret = nilfs_segment_list_add(&segments, segnum); + if (unlikely(ret)) goto failed; - } - list_add_tail(&ent->list, &segments); seg_seq++; segnum = nextnum; @@ -907,13 +950,15 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, super_root_found: /* Updating pointers relating to the latest checkpoint */ - list_splice(&segments, ri->ri_used_segments.prev); + brelse(bh_sum); + list_splice_tail(&segments, &ri->ri_used_segments); nilfs->ns_last_pseg = sr_pseg_start; nilfs->ns_last_seq = nilfs->ns_seg_seq; nilfs->ns_last_cno = ri->ri_cno; return 0; failed: + brelse(bh_sum); nilfs_dispose_segment_list(&segments); return (ret < 0) ? ret : nilfs_warn_segment_error(ret); } diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h deleted file mode 100644 index adccd4fc654..00000000000 --- a/fs/nilfs2/sb.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * sb.h - NILFS on-memory super block structure. - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> - * - */ - -#ifndef _NILFS_SB -#define _NILFS_SB - -#include <linux/types.h> -#include <linux/fs.h> - -/* - * Mount options - */ -struct nilfs_mount_options { - unsigned long mount_opt; - __u64 snapshot_cno; -}; - -struct the_nilfs; -struct nilfs_sc_info; - -/* - * NILFS super-block data in memory - */ -struct nilfs_sb_info { - /* Snapshot status */ - __u64 s_snapshot_cno; /* Checkpoint number */ - atomic_t s_inodes_count; - atomic_t s_blocks_count; /* Reserved (might be deleted) */ - - /* Mount options */ - unsigned long s_mount_opt; - uid_t s_resuid; - gid_t s_resgid; - - unsigned long s_interval; /* construction interval */ - unsigned long s_watermark; /* threshold of data amount - for the segment construction */ - - /* Fundamental members */ - struct super_block *s_super; /* reverse pointer to super_block */ - struct the_nilfs *s_nilfs; - struct list_head s_list; /* list head for nilfs->ns_supers */ - - /* Segment constructor */ - struct list_head s_dirty_files; /* dirty files list */ - struct nilfs_sc_info *s_sc_info; /* segment constructor info */ - spinlock_t s_inode_lock; /* Lock for the nilfs inode. - It covers s_dirty_files list */ - - /* Metadata files */ - struct inode *s_ifile; /* index file inode */ - - /* Inode allocator */ - spinlock_t s_next_gen_lock; - u32 s_next_generation; -}; - -static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi) -{ - return sbi->s_sc_info; -} - -/* - * Bit operations for the mount option - */ -#define nilfs_clear_opt(sbi, opt) \ - do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0) -#define nilfs_set_opt(sbi, opt) \ - do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0) -#define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt) -#define nilfs_write_opt(sbi, mask, opt) \ - do { (sbi)->s_mount_opt = \ - (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \ - NILFS_MOUNT_##opt); \ - } while (0) - -#endif /* _NILFS_SB */ diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 1e68821b4a9..dc3a9efdaab 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -24,33 +24,25 @@ #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/crc32.h> +#include <linux/backing-dev.h> +#include <linux/slab.h> #include "page.h" #include "segbuf.h" -#include "seglist.h" -static struct kmem_cache *nilfs_segbuf_cachep; +struct nilfs_write_info { + struct the_nilfs *nilfs; + struct bio *bio; + int start, end; /* The region to be submitted */ + int rest_blocks; + int max_pages; + int nr_vecs; + sector_t blocknr; +}; -static void nilfs_segbuf_init_once(void *obj) -{ - memset(obj, 0, sizeof(struct nilfs_segment_buffer)); -} - -int __init nilfs_init_segbuf_cache(void) -{ - nilfs_segbuf_cachep = - kmem_cache_create("nilfs2_segbuf_cache", - sizeof(struct nilfs_segment_buffer), - 0, SLAB_RECLAIM_ACCOUNT, - nilfs_segbuf_init_once); - - return (nilfs_segbuf_cachep == NULL) ? -ENOMEM : 0; -} - -void nilfs_destroy_segbuf_cache(void) -{ - kmem_cache_destroy(nilfs_segbuf_cachep); -} +static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, + struct the_nilfs *nilfs); +static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf); struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) { @@ -64,6 +56,12 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) INIT_LIST_HEAD(&segbuf->sb_list); INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); INIT_LIST_HEAD(&segbuf->sb_payload_buffers); + segbuf->sb_super_root = NULL; + + init_completion(&segbuf->sb_bio_event); + atomic_set(&segbuf->sb_err, 0); + segbuf->sb_nbio = 0; + return segbuf; } @@ -84,6 +82,22 @@ void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; } +/** + * nilfs_segbuf_map_cont - map a new log behind a given log + * @segbuf: new segment buffer + * @prev: segment buffer containing a log to be continued + */ +void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, + struct nilfs_segment_buffer *prev) +{ + segbuf->sb_segnum = prev->sb_segnum; + segbuf->sb_fseg_start = prev->sb_fseg_start; + segbuf->sb_fseg_end = prev->sb_fseg_end; + segbuf->sb_pseg_start = prev->sb_pseg_start + prev->sb_sum.nblocks; + segbuf->sb_rest_blocks = + segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; +} + void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, __u64 nextnum, struct the_nilfs *nilfs) { @@ -120,7 +134,7 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf, } int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, - time_t ctime) + time_t ctime, __u64 cno) { int err; @@ -133,13 +147,12 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; segbuf->sb_sum.ctime = ctime; - - segbuf->sb_io_error = 0; + segbuf->sb_sum.cno = cno; return 0; } /* - * Setup segument summary + * Setup segment summary */ void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf) { @@ -160,13 +173,14 @@ void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf) raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo); raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes); raw_sum->ss_pad = 0; + raw_sum->ss_cno = cpu_to_le64(segbuf->sb_sum.cno); } /* * CRC calculation routines */ -void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, - u32 seed) +static void +nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, u32 seed) { struct buffer_head *bh; struct nilfs_segment_summary *raw_sum; @@ -193,8 +207,8 @@ void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, raw_sum->ss_sumsum = cpu_to_le32(crc); } -void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, - u32 seed) +static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, + u32 seed) { struct buffer_head *bh; struct nilfs_segment_summary *raw_sum; @@ -213,65 +227,145 @@ void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, crc = crc32_le(crc, bh->b_data, bh->b_size); } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } raw_sum->ss_datasum = cpu_to_le32(crc); } -void nilfs_release_buffers(struct list_head *list) +static void +nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf, + u32 seed) +{ + struct nilfs_super_root *raw_sr; + struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info; + unsigned srsize; + u32 crc; + + raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; + srsize = NILFS_SR_BYTES(nilfs->ns_inode_size); + crc = crc32_le(seed, + (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), + srsize - sizeof(raw_sr->sr_sum)); + raw_sr->sr_sum = cpu_to_le32(crc); +} + +static void nilfs_release_buffers(struct list_head *list) { struct buffer_head *bh, *n; list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { list_del_init(&bh->b_assoc_buffers); - if (buffer_nilfs_allocated(bh)) { - struct page *clone_page = bh->b_page; - - /* remove clone page */ - brelse(bh); - page_cache_release(clone_page); /* for each bh */ - if (page_count(clone_page) <= 2) { - lock_page(clone_page); - nilfs_free_private_page(clone_page); - } - continue; - } brelse(bh); } } +static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) +{ + nilfs_release_buffers(&segbuf->sb_segsum_buffers); + nilfs_release_buffers(&segbuf->sb_payload_buffers); + segbuf->sb_super_root = NULL; +} + +/* + * Iterators for segment buffers + */ +void nilfs_clear_logs(struct list_head *logs) +{ + struct nilfs_segment_buffer *segbuf; + + list_for_each_entry(segbuf, logs, sb_list) + nilfs_segbuf_clear(segbuf); +} + +void nilfs_truncate_logs(struct list_head *logs, + struct nilfs_segment_buffer *last) +{ + struct nilfs_segment_buffer *n, *segbuf; + + segbuf = list_prepare_entry(last, logs, sb_list); + list_for_each_entry_safe_continue(segbuf, n, logs, sb_list) { + list_del_init(&segbuf->sb_list); + nilfs_segbuf_clear(segbuf); + nilfs_segbuf_free(segbuf); + } +} + +int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf; + int ret = 0; + + list_for_each_entry(segbuf, logs, sb_list) { + ret = nilfs_segbuf_write(segbuf, nilfs); + if (ret) + break; + } + return ret; +} + +int nilfs_wait_on_logs(struct list_head *logs) +{ + struct nilfs_segment_buffer *segbuf; + int err, ret = 0; + + list_for_each_entry(segbuf, logs, sb_list) { + err = nilfs_segbuf_wait(segbuf); + if (err && !ret) + ret = err; + } + return ret; +} + +/** + * nilfs_add_checksums_on_logs - add checksums on the logs + * @logs: list of segment buffers storing target logs + * @seed: checksum seed value + */ +void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed) +{ + struct nilfs_segment_buffer *segbuf; + + list_for_each_entry(segbuf, logs, sb_list) { + if (segbuf->sb_super_root) + nilfs_segbuf_fill_in_super_root_crc(segbuf, seed); + nilfs_segbuf_fill_in_segsum_crc(segbuf, seed); + nilfs_segbuf_fill_in_data_crc(segbuf, seed); + } +} + /* * BIO operations */ static void nilfs_end_bio_write(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nilfs_write_info *wi = bio->bi_private; + struct nilfs_segment_buffer *segbuf = bio->bi_private; if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - bio_put(bio); - /* to be detected by submit_seg_bio() */ + /* to be detected by nilfs_segbuf_submit_bio() */ } if (!uptodate) - atomic_inc(&wi->err); + atomic_inc(&segbuf->sb_err); bio_put(bio); - complete(&wi->bio_event); + complete(&segbuf->sb_bio_event); } -static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) +static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi, int mode) { struct bio *bio = wi->bio; int err; - if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) { - wait_for_completion(&wi->bio_event); - wi->nbio--; - if (unlikely(atomic_read(&wi->err))) { + if (segbuf->sb_nbio > 0 && + bdi_write_congested(segbuf->sb_super->s_bdi)) { + wait_for_completion(&segbuf->sb_bio_event); + segbuf->sb_nbio--; + if (unlikely(atomic_read(&segbuf->sb_err))) { bio_put(bio); err = -EIO; goto failed; @@ -279,15 +373,15 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) } bio->bi_end_io = nilfs_end_bio_write; - bio->bi_private = wi; + bio->bi_private = segbuf; bio_get(bio); submit_bio(mode, bio); + segbuf->sb_nbio++; if (bio_flagged(bio, BIO_EOPNOTSUPP)) { bio_put(bio); err = -EOPNOTSUPP; goto failed; } - wi->nbio++; bio_put(bio); wi->bio = NULL; @@ -302,57 +396,53 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) } /** - * nilfs_alloc_seg_bio - allocate a bio for writing segment. - * @sb: super block - * @start: beginning disk block number of this BIO. + * nilfs_alloc_seg_bio - allocate a new bio for writing log + * @nilfs: nilfs object + * @start: start block number of the bio * @nr_vecs: request size of page vector. * - * alloc_seg_bio() allocates a new BIO structure and initialize it. - * * Return Value: On success, pointer to the struct bio is returned. * On error, NULL is returned. */ -static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, +static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start, int nr_vecs) { struct bio *bio; - bio = bio_alloc(GFP_NOWAIT, nr_vecs); + bio = bio_alloc(GFP_NOIO, nr_vecs); if (bio == NULL) { while (!bio && (nr_vecs >>= 1)) - bio = bio_alloc(GFP_NOWAIT, nr_vecs); + bio = bio_alloc(GFP_NOIO, nr_vecs); } if (likely(bio)) { - bio->bi_bdev = sb->s_bdev; - bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9); + bio->bi_bdev = nilfs->ns_bdev; + bio->bi_iter.bi_sector = + start << (nilfs->ns_blocksize_bits - 9); } return bio; } -void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) +static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi) { wi->bio = NULL; wi->rest_blocks = segbuf->sb_sum.nblocks; - wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); + wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev); wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end = 0; - wi->nbio = 0; wi->blocknr = segbuf->sb_pseg_start; - - atomic_set(&wi->err, 0); - init_completion(&wi->bio_event); } -static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, - int mode) +static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi, + struct buffer_head *bh, int mode) { int len, err; BUG_ON(wi->nr_vecs <= 0); repeat: if (!wi->bio) { - wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end, + wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end, wi->nr_vecs); if (unlikely(!wi->bio)) return -ENOMEM; @@ -364,76 +454,83 @@ static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, return 0; } /* bio is FULL */ - err = nilfs_submit_seg_bio(wi, mode); + err = nilfs_segbuf_submit_bio(segbuf, wi, mode); /* never submit current bh */ if (likely(!err)) goto repeat; return err; } -int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) +/** + * nilfs_segbuf_write - submit write requests of a log + * @segbuf: buffer storing a log to be written + * @nilfs: nilfs object + * + * Return Value: On Success, 0 is returned. On Error, one of the following + * negative error code is returned. + * + * %-EIO - I/O error + * + * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, + struct the_nilfs *nilfs) { + struct nilfs_write_info wi; struct buffer_head *bh; - int res, rw = WRITE; + int res = 0, rw = WRITE; + + wi.nilfs = nilfs; + nilfs_segbuf_prepare_write(segbuf, &wi); list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - res = nilfs_submit_bh(wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); if (unlikely(res)) goto failed_bio; } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - res = nilfs_submit_bh(wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); if (unlikely(res)) goto failed_bio; } - if (wi->bio) { + if (wi.bio) { /* * Last BIO is always sent through the following * submission. */ - rw |= (1 << BIO_RW_SYNCIO); - res = nilfs_submit_seg_bio(wi, rw); - if (unlikely(res)) - goto failed_bio; + rw |= REQ_SYNC; + res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); } - res = 0; - out: - return res; - failed_bio: - atomic_inc(&wi->err); - goto out; + return res; } /** * nilfs_segbuf_wait - wait for completion of requested BIOs - * @wi: nilfs_write_info + * @segbuf: segment buffer * * Return Value: On Success, 0 is returned. On Error, one of the following * negative error code is returned. * * %-EIO - I/O error */ -int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) +static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) { int err = 0; - if (!wi->nbio) + if (!segbuf->sb_nbio) return 0; do { - wait_for_completion(&wi->bio_event); - } while (--wi->nbio > 0); + wait_for_completion(&segbuf->sb_bio_event); + } while (--segbuf->sb_nbio > 0); - if (unlikely(atomic_read(&wi->err) > 0)) { + if (unlikely(atomic_read(&segbuf->sb_err) > 0)) { printk(KERN_ERR "NILFS: IO error writing segment\n"); err = -EIO; - segbuf->sb_io_error = 1; } return err; } diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 0c3076f4e59..b04f08cc239 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -27,7 +27,6 @@ #include <linux/buffer_head.h> #include <linux/bio.h> #include <linux/completion.h> -#include <linux/backing-dev.h> /** * struct nilfs_segsum_info - On-memory segment summary @@ -38,6 +37,7 @@ * @sumbytes: Byte count of segment summary * @nfileblk: Total number of file blocks * @seg_seq: Segment sequence number + * @cno: Checkpoint number * @ctime: Creation time * @next: Block number of the next full segment */ @@ -49,21 +49,11 @@ struct nilfs_segsum_info { unsigned long sumbytes; unsigned long nfileblk; u64 seg_seq; + __u64 cno; time_t ctime; sector_t next; }; -/* macro for the flags */ -#define NILFS_SEG_HAS_SR(sum) ((sum)->flags & NILFS_SS_SR) -#define NILFS_SEG_LOGBGN(sum) ((sum)->flags & NILFS_SS_LOGBGN) -#define NILFS_SEG_LOGEND(sum) ((sum)->flags & NILFS_SS_LOGEND) -#define NILFS_SEG_DSYNC(sum) ((sum)->flags & NILFS_SS_SYNDT) -#define NILFS_SEG_SIMPLEX(sum) \ - (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \ - (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) - -#define NILFS_SEG_EMPTY(sum) ((sum)->nblocks == (sum)->nsumblk) - /** * struct nilfs_segment_buffer - Segment buffer * @sb_super: back pointer to a superblock struct @@ -77,7 +67,10 @@ struct nilfs_segsum_info { * @sb_rest_blocks: Number of residual blocks in the current segment * @sb_segsum_buffers: List of buffers for segment summaries * @sb_payload_buffers: List of buffers for segment payload - * @sb_io_error: I/O error status + * @sb_super_root: Pointer to buffer storing a super root block (if exists) + * @sb_nbio: Number of flying bio requests + * @sb_err: I/O error status + * @sb_bio_event: Completion event of log writing */ struct nilfs_segment_buffer { struct super_block *sb_super; @@ -94,9 +87,12 @@ struct nilfs_segment_buffer { /* Buffers */ struct list_head sb_segsum_buffers; struct list_head sb_payload_buffers; /* including super root */ + struct buffer_head *sb_super_root; /* io status */ - int sb_io_error; + int sb_nbio; + atomic_t sb_err; + struct completion sb_bio_event; }; #define NILFS_LIST_SEGBUF(head) \ @@ -118,22 +114,34 @@ struct nilfs_segment_buffer { b_assoc_buffers)) #define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head) +extern struct kmem_cache *nilfs_segbuf_cachep; -int __init nilfs_init_segbuf_cache(void); -void nilfs_destroy_segbuf_cache(void); struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); void nilfs_segbuf_free(struct nilfs_segment_buffer *); void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, struct the_nilfs *); +void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, + struct nilfs_segment_buffer *prev); void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, struct the_nilfs *); -int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); +int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t, __u64); int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, struct buffer_head **); void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); -void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *, u32); -void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *, u32); + +static inline int nilfs_segbuf_simplex(struct nilfs_segment_buffer *segbuf) +{ + unsigned int flags = segbuf->sb_sum.flags; + + return (flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == + (NILFS_SS_LOGBGN | NILFS_SS_LOGEND); +} + +static inline int nilfs_segbuf_empty(struct nilfs_segment_buffer *segbuf) +{ + return segbuf->sb_sum.nblocks == segbuf->sb_sum.nsumblk; +} static inline void nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, @@ -161,41 +169,16 @@ nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf, segbuf->sb_sum.nfileblk++; } -void nilfs_release_buffers(struct list_head *); +void nilfs_clear_logs(struct list_head *logs); +void nilfs_truncate_logs(struct list_head *logs, + struct nilfs_segment_buffer *last); +int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs); +int nilfs_wait_on_logs(struct list_head *logs); +void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed); -static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) +static inline void nilfs_destroy_logs(struct list_head *logs) { - nilfs_release_buffers(&segbuf->sb_segsum_buffers); - nilfs_release_buffers(&segbuf->sb_payload_buffers); + nilfs_truncate_logs(logs, NULL); } -struct nilfs_write_info { - struct bio *bio; - int start, end; /* The region to be submitted */ - int rest_blocks; - int max_pages; - int nr_vecs; - sector_t blocknr; - - int nbio; - atomic_t err; - struct completion bio_event; - /* completion event of segment write */ - - /* - * The following fields must be set explicitly - */ - struct super_block *sb; - struct backing_dev_info *bdi; /* backing dev info */ - struct buffer_head *bh_sr; -}; - - -void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *, - struct nilfs_write_info *); -int nilfs_segbuf_write(struct nilfs_segment_buffer *, - struct nilfs_write_info *); -int nilfs_segbuf_wait(struct nilfs_segment_buffer *, - struct nilfs_write_info *); - #endif /* _NILFS_SEGBUF_H */ diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h deleted file mode 100644 index d39df9144e9..00000000000 --- a/fs/nilfs2/seglist.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * seglist.h - expediential structure and routines to handle list of segments - * (would be removed in a future release) - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> - * - */ -#ifndef _NILFS_SEGLIST_H -#define _NILFS_SEGLIST_H - -#include <linux/fs.h> -#include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> -#include "sufile.h" - -struct nilfs_segment_entry { - __u64 segnum; - -#define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally. - It must be cancelled if - construction aborted */ - - unsigned flags; - struct list_head list; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; -}; - - -void nilfs_dispose_segment_list(struct list_head *); - -static inline struct nilfs_segment_entry * -nilfs_alloc_segment_entry(__u64 segnum) -{ - struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); - - if (likely(ent)) { - ent->segnum = segnum; - ent->flags = 0; - ent->bh_su = NULL; - ent->raw_su = NULL; - INIT_LIST_HEAD(&ent->list); - } - return ent; -} - -static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent, - struct inode *sufile) -{ - return nilfs_sufile_get_segment_usage(sufile, ent->segnum, - &ent->raw_su, &ent->bh_su); -} - -static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent, - struct inode *sufile) -{ - if (!ent->bh_su) - return; - nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su); - ent->bh_su = NULL; - ent->raw_su = NULL; -} - -static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent) -{ - kfree(ent); -} - -#endif /* _NILFS_SEGLIST_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 22c7f65c240..a1a191634ab 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -32,6 +32,7 @@ #include <linux/kthread.h> #include <linux/crc32.h> #include <linux/pagevec.h> +#include <linux/slab.h> #include "nilfs.h" #include "btnode.h" #include "page.h" @@ -39,7 +40,6 @@ #include "sufile.h" #include "cpfile.h" #include "ifile.h" -#include "seglist.h" #include "segbuf.h" @@ -79,7 +79,8 @@ enum { /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ -#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED) +#define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */ +#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED) /* Operations depending on the construction mode and file type */ struct nilfs_sc_operations { @@ -103,8 +104,7 @@ struct nilfs_sc_operations { static void nilfs_segctor_start_timer(struct nilfs_sc_info *); static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); -static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, - int); +static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); #define nilfs_cnt32_gt(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ @@ -115,42 +115,6 @@ static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, #define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) #define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) -/* - * Transaction - */ -static struct kmem_cache *nilfs_transaction_cachep; - -/** - * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info - * - * nilfs_init_transaction_cache() creates a slab cache for the struct - * nilfs_transaction_info. - * - * Return Value: On success, it returns 0. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. - */ -int nilfs_init_transaction_cache(void) -{ - nilfs_transaction_cachep = - kmem_cache_create("nilfs2_transaction_cache", - sizeof(struct nilfs_transaction_info), - 0, SLAB_RECLAIM_ACCOUNT, NULL); - return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0; -} - -/** - * nilfs_detroy_transaction_cache - destroy the cache for transaction info - * - * nilfs_destroy_transaction_cache() frees the slab cache for the struct - * nilfs_transaction_info. - */ -void nilfs_destroy_transaction_cache(void) -{ - kmem_cache_destroy(nilfs_transaction_cachep); -} - static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) { struct nilfs_transaction_info *cur_ti = current->journal_info; @@ -201,7 +165,7 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) * This function allocates a nilfs_transaction_info struct to keep context * information on it. It is initialized and hooked onto the current task in * the outermost call. If a pre-allocated struct is given to @ti, it is used - * instead; othewise a new struct is assigned from a slab. + * instead; otherwise a new struct is assigned from a slab. * * When @vacancy_check flag is set, this function will check the amount of * free space, and will wait for the GC to reclaim disk space if low capacity. @@ -217,7 +181,6 @@ int nilfs_transaction_begin(struct super_block *sb, struct nilfs_transaction_info *ti, int vacancy_check) { - struct nilfs_sb_info *sbi; struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); @@ -226,8 +189,9 @@ int nilfs_transaction_begin(struct super_block *sb, if (ret > 0) return 0; - sbi = NILFS_SB(sb); - nilfs = sbi->s_nilfs; + sb_start_intwrite(sb); + + nilfs = sb->s_fs_info; down_read(&nilfs->ns_segctor_sem); if (vacancy_check && nilfs_near_disk_full(nilfs)) { up_read(&nilfs->ns_segctor_sem); @@ -241,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb, current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); return ret; } @@ -258,8 +223,7 @@ int nilfs_transaction_begin(struct super_block *sb, int nilfs_transaction_commit(struct super_block *sb) { struct nilfs_transaction_info *ti = current->journal_info; - struct nilfs_sb_info *sbi; - struct nilfs_sc_info *sci; + struct the_nilfs *nilfs = sb->s_fs_info; int err = 0; BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); @@ -268,46 +232,47 @@ int nilfs_transaction_commit(struct super_block *sb) ti->ti_count--; return 0; } - sbi = NILFS_SB(sb); - sci = NILFS_SC(sbi); - if (sci != NULL) { + if (nilfs->ns_writer) { + struct nilfs_sc_info *sci = nilfs->ns_writer; + if (ti->ti_flags & NILFS_TI_COMMIT) nilfs_segctor_start_timer(sci); - if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > - sci->sc_watermark) + if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark) nilfs_segctor_do_flush(sci, 0); } - up_read(&sbi->s_nilfs->ns_segctor_sem); + up_read(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_SYNC) err = nilfs_construct_segment(sb); if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); return err; } void nilfs_transaction_abort(struct super_block *sb) { struct nilfs_transaction_info *ti = current->journal_info; + struct the_nilfs *nilfs = sb->s_fs_info; BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); if (ti->ti_count > 0) { ti->ti_count--; return; } - up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); + up_read(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); } void nilfs_relax_pressure_in_lock(struct super_block *sb) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; if (!sci || !sci->sc_flush_request) return; @@ -327,11 +292,13 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb) downgrade_write(&nilfs->ns_segctor_sem); } -static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, +static void nilfs_transaction_lock(struct super_block *sb, struct nilfs_transaction_info *ti, int gcflag) { struct nilfs_transaction_info *cur_ti = current->journal_info; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; WARN_ON(cur_ti); ti->ti_flags = NILFS_TI_WRITER; @@ -342,30 +309,31 @@ static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, current->journal_info = ti; for (;;) { - down_write(&sbi->s_nilfs->ns_segctor_sem); - if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) + down_write(&nilfs->ns_segctor_sem); + if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) break; - nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); + nilfs_segctor_do_immediate_flush(sci); - up_write(&sbi->s_nilfs->ns_segctor_sem); + up_write(&nilfs->ns_segctor_sem); yield(); } if (gcflag) ti->ti_flags |= NILFS_TI_GC; } -static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) +static void nilfs_transaction_unlock(struct super_block *sb) { struct nilfs_transaction_info *ti = current->journal_info; + struct the_nilfs *nilfs = sb->s_fs_info; BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); BUG_ON(ti->ti_count > 0); - up_write(&sbi->s_nilfs->ns_segctor_sem); + up_write(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; if (!list_empty(&ti->ti_garbage)) - nilfs_dispose_list(sbi, &ti->ti_garbage, 0); + nilfs_dispose_list(nilfs, &ti->ti_garbage, 0); } static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, @@ -401,7 +369,7 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) if (nilfs_doing_gc()) flags = NILFS_SS_GC; - err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime); + err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno); if (unlikely(err)) return err; @@ -434,7 +402,7 @@ static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) return err; segbuf = sci->sc_curseg; } - err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root); + err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root); if (likely(!err)) segbuf->sb_sum.flags |= NILFS_SS_SR; return err; @@ -463,7 +431,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, nilfs_segctor_map_segsum_entry( sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); - if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) + if (NILFS_I(inode)->i_root && + !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); /* skip finfo */ } @@ -474,17 +443,26 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, struct nilfs_finfo *finfo; struct nilfs_inode_info *ii; struct nilfs_segment_buffer *segbuf; + __u64 cno; if (sci->sc_blk_cnt == 0) return; ii = NILFS_I(inode); + + if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + cno = ii->i_cno; + else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) + cno = 0; + else + cno = sci->sc_cno; + finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, sizeof(*finfo)); finfo->fi_ino = cpu_to_le64(inode->i_ino); finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); - finfo->fi_cno = cpu_to_le64(ii->i_cno); + finfo->fi_cno = cpu_to_le64(cno); segbuf = sci->sc_curseg; segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + @@ -528,17 +506,6 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, return err; } -static int nilfs_handle_bmap_error(int err, const char *fname, - struct inode *inode, struct super_block *sb) -{ - if (err == -EINVAL) { - nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", - inode->i_ino); - err = -EIO; - } - return err; -} - /* * Callback functions that enumerate, mark, and collect dirty blocks */ @@ -548,9 +515,8 @@ static int nilfs_collect_file_data(struct nilfs_sc_info *sci, int err; err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); + if (err < 0) + return err; err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(struct nilfs_binfo_v)); @@ -563,13 +529,7 @@ static int nilfs_collect_file_node(struct nilfs_sc_info *sci, struct buffer_head *bh, struct inode *inode) { - int err; - - err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); - return 0; + return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); } static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, @@ -598,7 +558,7 @@ static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, *vblocknr = binfo->bi_v.bi_vblocknr; } -struct nilfs_sc_operations nilfs_sc_file_ops = { +static struct nilfs_sc_operations nilfs_sc_file_ops = { .collect_data = nilfs_collect_file_data, .collect_node = nilfs_collect_file_node, .collect_bmap = nilfs_collect_file_bmap, @@ -612,9 +572,8 @@ static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, int err; err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); + if (err < 0) + return err; err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); if (!err) @@ -648,7 +607,7 @@ static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, *binfo_dat = binfo->bi_dat; } -struct nilfs_sc_operations nilfs_sc_dat_ops = { +static struct nilfs_sc_operations nilfs_sc_dat_ops = { .collect_data = nilfs_collect_dat_data, .collect_node = nilfs_collect_file_node, .collect_bmap = nilfs_collect_dat_bmap, @@ -656,7 +615,7 @@ struct nilfs_sc_operations nilfs_sc_dat_ops = { .write_node_binfo = nilfs_write_dat_node_binfo, }; -struct nilfs_sc_operations nilfs_sc_dsync_ops = { +static struct nilfs_sc_operations nilfs_sc_dsync_ops = { .collect_data = nilfs_collect_file_data, .collect_node = NULL, .collect_bmap = NULL, @@ -699,17 +658,14 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, if (unlikely(page->index > last)) break; - if (mapping->host) { - lock_page(page); - if (!page_has_buffers(page)) - create_empty_buffers(page, - 1 << inode->i_blkbits, 0); - unlock_page(page); - } + lock_page(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, 0); + unlock_page(page); bh = head = page_buffers(page); do { - if (!buffer_dirty(bh)) + if (!buffer_dirty(bh) || buffer_async_write(bh)) continue; get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -743,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do { - if (buffer_dirty(bh)) { + if (buffer_dirty(bh) && + !buffer_async_write(bh)) { get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -756,7 +713,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, } } -static void nilfs_dispose_list(struct nilfs_sb_info *sbi, +static void nilfs_dispose_list(struct the_nilfs *nilfs, struct list_head *head, int force) { struct nilfs_inode_info *ii, *n; @@ -764,7 +721,7 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi, unsigned nv = 0; while (!list_empty(head)) { - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); list_for_each_entry_safe(ii, n, head, i_dirty) { list_del_init(&ii->i_dirty); if (force) { @@ -775,34 +732,33 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi, } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { set_bit(NILFS_I_QUEUED, &ii->i_state); list_add_tail(&ii->i_dirty, - &sbi->s_dirty_files); + &nilfs->ns_dirty_files); continue; } ivec[nv++] = ii; if (nv == SC_N_INODEVEC) break; } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); for (pii = ivec; nv > 0; pii++, nv--) iput(&(*pii)->vfs_inode); } } -static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) +static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, + struct nilfs_root *root) { - struct the_nilfs *nilfs = sbi->s_nilfs; int ret = 0; - if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) + if (nilfs_mdt_fetch_dirty(root->ifile)) ret++; if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) ret++; if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) ret++; - if (ret || nilfs_doing_gc()) - if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) - ret++; + if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat)) + ret++; return ret; } @@ -810,40 +766,39 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci) { return list_empty(&sci->sc_dirty_files) && !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && - list_empty(&sci->sc_cleaning_segments) && + sci->sc_nfreesegs == 0 && (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); } static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) { - struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int ret = 0; - if (nilfs_test_metadata_dirty(sbi)) + if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) set_bit(NILFS_SC_DIRTY, &sci->sc_flags); - spin_lock(&sbi->s_inode_lock); - if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) + spin_lock(&nilfs->ns_inode_lock); + if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci)) ret++; - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); return ret; } static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - nilfs_mdt_clear_dirty(sbi->s_ifile); + nilfs_mdt_clear_dirty(sci->sc_root->ifile); nilfs_mdt_clear_dirty(nilfs->ns_cpfile); nilfs_mdt_clear_dirty(nilfs->ns_sufile); - nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); + nilfs_mdt_clear_dirty(nilfs->ns_dat); } static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) { - struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct buffer_head *bh_cp; struct nilfs_checkpoint *raw_cp; int err; @@ -855,7 +810,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) /* The following code is duplicated with cpfile. But, it is needed to collect the checkpoint even if it was not newly created */ - nilfs_mdt_mark_buffer_dirty(bh_cp); + mark_buffer_dirty(bh_cp); nilfs_mdt_mark_dirty(nilfs->ns_cpfile); nilfs_cpfile_put_checkpoint( nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); @@ -867,8 +822,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct buffer_head *bh_cp; struct nilfs_checkpoint *raw_cp; int err; @@ -882,9 +836,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) raw_cp->cp_snapshot_list.ssl_next = 0; raw_cp->cp_snapshot_list.ssl_prev = 0; raw_cp->cp_inodes_count = - cpu_to_le64(atomic_read(&sbi->s_inodes_count)); + cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); raw_cp->cp_blocks_count = - cpu_to_le64(atomic_read(&sbi->s_blocks_count)); + cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); raw_cp->cp_nblk_inc = cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); @@ -895,7 +849,8 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) else nilfs_checkpoint_set_minor(raw_cp); - nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); + nilfs_write_inode_common(sci->sc_root->ifile, + &raw_cp->cp_ifile_inode, 1); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); return 0; @@ -920,66 +875,41 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile, } } -static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, - struct inode *ifile) +static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) { struct nilfs_inode_info *ii; list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { - nilfs_fill_in_file_bmap(ifile, ii); + nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii); set_bit(NILFS_I_COLLECTED, &ii->i_state); } } -/* - * CRC calculation routines - */ -static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed) -{ - struct nilfs_super_root *raw_sr = - (struct nilfs_super_root *)bh_sr->b_data; - u32 crc; - - crc = crc32_le(seed, - (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), - NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); - raw_sr->sr_sum = cpu_to_le32(crc); -} - -static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci, - u32 seed) -{ - struct nilfs_segment_buffer *segbuf; - - if (sci->sc_super_root) - nilfs_fill_in_super_root_crc(sci->sc_super_root, seed); - - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - nilfs_segbuf_fill_in_segsum_crc(segbuf, seed); - nilfs_segbuf_fill_in_data_crc(segbuf, seed); - } -} - static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct buffer_head *bh_sr = sci->sc_super_root; - struct nilfs_super_root *raw_sr = - (struct nilfs_super_root *)bh_sr->b_data; - unsigned isz = nilfs->ns_inode_size; + struct buffer_head *bh_sr; + struct nilfs_super_root *raw_sr; + unsigned isz, srsz; - raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); + bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; + raw_sr = (struct nilfs_super_root *)bh_sr->b_data; + isz = nilfs->ns_inode_size; + srsz = NILFS_SR_BYTES(isz); + + raw_sr->sr_bytes = cpu_to_le16(srsz); raw_sr->sr_nongc_ctime = cpu_to_le64(nilfs_doing_gc() ? nilfs->ns_nongc_ctime : sci->sc_seg_ctime); raw_sr->sr_flags = 0; - nilfs_mdt_write_inode_direct( - nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); - nilfs_mdt_write_inode_direct( - nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); - nilfs_mdt_write_inode_direct( - nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); + nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr + + NILFS_SR_DAT_OFFSET(isz), 1); + nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + + NILFS_SR_CPFILE_OFFSET(isz), 1); + nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + + NILFS_SR_SUFILE_OFFSET(isz), 1); + memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); } static void nilfs_redirty_inodes(struct list_head *head) @@ -1005,44 +935,6 @@ static void nilfs_drop_collected_inodes(struct list_head *head) } } -static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, - struct inode *sufile) - -{ - struct list_head *head = &sci->sc_cleaning_segments; - struct nilfs_segment_entry *ent; - int err; - - list_for_each_entry(ent, head, list) { - if (!(ent->flags & NILFS_SLH_FREED)) - break; - err = nilfs_sufile_cancel_free(sufile, ent->segnum); - WARN_ON(err); /* do not happen */ - ent->flags &= ~NILFS_SLH_FREED; - } -} - -static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci, - struct inode *sufile) -{ - struct list_head *head = &sci->sc_cleaning_segments; - struct nilfs_segment_entry *ent; - int err; - - list_for_each_entry(ent, head, list) { - err = nilfs_sufile_free(sufile, ent->segnum); - if (unlikely(err)) - return err; - ent->flags |= NILFS_SLH_FREED; - } - return 0; -} - -static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci) -{ - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); -} - static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, struct inode *inode, struct list_head *listp, @@ -1066,8 +958,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, dispose_buffers: while (!list_empty(listp)) { - bh = list_entry(listp->next, struct buffer_head, - b_assoc_buffers); + bh = list_first_entry(listp, struct buffer_head, + b_assoc_buffers); list_del_init(&bh->b_assoc_buffers); brelse(bh); } @@ -1157,10 +1049,10 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; + size_t ndone; int err = 0; switch (sci->sc_stage.scnt) { @@ -1233,7 +1125,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; /* Fall through */ case NILFS_ST_IFILE: - err = nilfs_segctor_scan_file(sci, sbi->s_ifile, + err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile, &nilfs_sc_file_ops); if (unlikely(err)) break; @@ -1250,10 +1142,16 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) break; sci->sc_stage.scnt++; /* Fall through */ case NILFS_ST_SUFILE: - err = nilfs_segctor_prepare_free_segments(sci, - nilfs->ns_sufile); - if (unlikely(err)) + err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, + sci->sc_nfreesegs, &ndone); + if (unlikely(err)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, ndone, + NULL); break; + } + sci->sc_stage.flags |= NILFS_CF_SUFREED; + err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, &nilfs_sc_file_ops); if (unlikely(err)) @@ -1261,7 +1159,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.scnt++; /* Fall through */ case NILFS_ST_DAT: dat_stage: - err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), + err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, &nilfs_sc_dat_ops); if (unlikely(err)) break; @@ -1304,73 +1202,75 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) return err; } -static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) -{ - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; - int err; - - err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su); - if (unlikely(err)) - return err; - nilfs_mdt_mark_buffer_dirty(bh_su); - nilfs_mdt_mark_dirty(sufile); - nilfs_sufile_put_segment_usage(sufile, segnum, bh_su); - return 0; -} - +/** + * nilfs_segctor_begin_construction - setup segment buffer to make a new log + * @sci: nilfs_sc_info + * @nilfs: nilfs object + */ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct nilfs_segment_buffer *segbuf, *n; + struct nilfs_segment_buffer *segbuf, *prev; __u64 nextnum; - int err; + int err, alloc = 0; - if (list_empty(&sci->sc_segbufs)) { - segbuf = nilfs_segbuf_new(sci->sc_super); - if (unlikely(!segbuf)) - return -ENOMEM; - list_add(&segbuf->sb_list, &sci->sc_segbufs); - } else - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + segbuf = nilfs_segbuf_new(sci->sc_super); + if (unlikely(!segbuf)) + return -ENOMEM; - nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, - nilfs); + if (list_empty(&sci->sc_write_logs)) { + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, + nilfs->ns_pseg_offset, nilfs); + if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { + nilfs_shift_to_next_segment(nilfs); + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + } + + segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; + nextnum = nilfs->ns_nextnum; - if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { - nilfs_shift_to_next_segment(nilfs); - nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + if (nilfs->ns_segnum == nilfs->ns_nextnum) + /* Start from the head of a new full segment */ + alloc++; + } else { + /* Continue logs */ + prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs); + nilfs_segbuf_map_cont(segbuf, prev); + segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq; + nextnum = prev->sb_nextnum; + + if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { + nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); + segbuf->sb_sum.seg_seq++; + alloc++; + } } - sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; - err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); - if (unlikely(err)) - return err; + err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum); + if (err) + goto failed; - if (nilfs->ns_segnum == nilfs->ns_nextnum) { - /* Start from the head of a new full segment */ + if (alloc) { err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); - if (unlikely(err)) - return err; - } else - nextnum = nilfs->ns_nextnum; - - segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; + if (err) + goto failed; + } nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); - /* truncating segment buffers */ - list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, - sb_list) { - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); - } + BUG_ON(!list_empty(&sci->sc_segbufs)); + list_add_tail(&segbuf->sb_list, &sci->sc_segbufs); + sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; return 0; + + failed: + nilfs_segbuf_free(segbuf); + return err; } static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs, int nadd) { - struct nilfs_segment_buffer *segbuf, *prev, *n; + struct nilfs_segment_buffer *segbuf, *prev; struct inode *sufile = nilfs->ns_sufile; __u64 nextnextnum; LIST_HEAD(list); @@ -1383,7 +1283,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, * not be dirty. The following call ensures that the buffer is dirty * and will pin the buffer on memory until the sufile is written. */ - err = nilfs_touch_segusage(sufile, prev->sb_nextnum); + err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum); if (unlikely(err)) return err; @@ -1409,33 +1309,33 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, list_add_tail(&segbuf->sb_list, &list); prev = segbuf; } - list_splice(&list, sci->sc_segbufs.prev); + list_splice_tail(&list, &sci->sc_segbufs); return 0; failed_segbuf: nilfs_segbuf_free(segbuf); failed: - list_for_each_entry_safe(segbuf, n, &list, sb_list) { + list_for_each_entry(segbuf, &list, sb_list) { ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); /* never fails */ - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); } + nilfs_destroy_logs(&list); return err; } -static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) +static void nilfs_free_incomplete_logs(struct list_head *logs, + struct the_nilfs *nilfs) { - struct nilfs_segment_buffer *segbuf; - int ret, done = 0; + struct nilfs_segment_buffer *segbuf, *prev; + struct inode *sufile = nilfs->ns_sufile; + int ret; - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + segbuf = NILFS_FIRST_SEGBUF(logs); if (nilfs->ns_nextnum != segbuf->sb_nextnum) { - ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); /* never fails */ } - if (segbuf->sb_io_error) { + if (atomic_read(&segbuf->sb_err)) { /* Case 1: The first segment failed */ if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) /* Case 1a: Partial segment appended into an existing @@ -1444,98 +1344,54 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, segbuf->sb_fseg_end); else /* Case 1b: New full segment */ set_nilfs_discontinued(nilfs); - done++; } - list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); - WARN_ON(ret); /* never fails */ - if (!done && segbuf->sb_io_error) { - if (segbuf->sb_segnum != nilfs->ns_nextnum) - /* Case 2: extended segment (!= next) failed */ - nilfs_sufile_set_error(nilfs->ns_sufile, - segbuf->sb_segnum); - done++; + prev = segbuf; + list_for_each_entry_continue(segbuf, logs, sb_list) { + if (prev->sb_nextnum != segbuf->sb_nextnum) { + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); + WARN_ON(ret); /* never fails */ } + if (atomic_read(&segbuf->sb_err) && + segbuf->sb_segnum != nilfs->ns_nextnum) + /* Case 2: extended segment (!= next) failed */ + nilfs_sufile_set_error(sufile, segbuf->sb_segnum); + prev = segbuf; } } -static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci) -{ - struct nilfs_segment_buffer *segbuf; - - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) - nilfs_segbuf_clear(segbuf); - sci->sc_super_root = NULL; -} - -static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci) -{ - struct nilfs_segment_buffer *segbuf; - - while (!list_empty(&sci->sc_segbufs)) { - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); - } - /* sci->sc_curseg = NULL; */ -} - -static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs, int err) -{ - if (unlikely(err)) { - nilfs_segctor_free_incomplete_segments(sci, nilfs); - nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); - } - nilfs_segctor_clear_segment_buffers(sci); -} - static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, struct inode *sufile) { struct nilfs_segment_buffer *segbuf; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; unsigned long live_blocks; int ret; list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); - WARN_ON(ret); /* always succeed because bh_su is dirty */ live_blocks = segbuf->sb_sum.nblocks + (segbuf->sb_pseg_start - segbuf->sb_fseg_start); - raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); - raw_su->su_nblocks = cpu_to_le32(live_blocks); - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, - bh_su); + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + live_blocks, + sci->sc_seg_ctime); + WARN_ON(ret); /* always succeed because the segusage is dirty */ } } -static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, - struct inode *sufile) +static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile) { struct nilfs_segment_buffer *segbuf; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; int ret; - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); - WARN_ON(ret); /* always succeed because bh_su is dirty */ - raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - - segbuf->sb_fseg_start); - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); + segbuf = NILFS_FIRST_SEGBUF(logs); + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + segbuf->sb_pseg_start - + segbuf->sb_fseg_start, 0); + WARN_ON(ret); /* always succeed because the segusage is dirty */ - list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); + list_for_each_entry_continue(segbuf, logs, sb_list) { + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + 0, 0); WARN_ON(ret); /* always succeed */ - raw_su->su_nblocks = 0; - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, - bh_su); } } @@ -1543,17 +1399,15 @@ static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, struct nilfs_segment_buffer *last, struct inode *sufile) { - struct nilfs_segment_buffer *segbuf = last, *n; + struct nilfs_segment_buffer *segbuf = last; int ret; - list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, - sb_list) { - list_del_init(&segbuf->sb_list); + list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); - nilfs_segbuf_free(segbuf); } + nilfs_truncate_logs(&sci->sc_segbufs, last); } @@ -1565,7 +1419,6 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, /* Collection retry loop */ for (;;) { - sci->sc_super_root = NULL; sci->sc_nblk_this_inc = 0; sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); @@ -1585,8 +1438,16 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) break; - nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); - nilfs_segctor_clear_segment_buffers(sci); + nilfs_clear_logs(&sci->sc_segbufs); + + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(err); /* do not happen */ + sci->sc_stage.flags &= ~NILFS_CF_SUFREED; + } err = nilfs_segctor_extend_segments(sci, nilfs, nadd); if (unlikely(err)) @@ -1636,7 +1497,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, ssp.offset = sizeof(struct nilfs_segment_summary); list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == sci->sc_super_root) + if (bh == segbuf->sb_super_root) break; if (!finfo) { finfo = nilfs_segctor_map_segsum_entry( @@ -1645,10 +1506,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, nblocks = le32_to_cpu(finfo->fi_nblocks); ndatablk = le32_to_cpu(finfo->fi_ndatablk); - if (buffer_nilfs_node(bh)) - inode = NILFS_BTNC_I(bh->b_page->mapping); - else - inode = NILFS_AS_I(bh->b_page->mapping); + inode = bh->b_page->mapping->host; if (mode == SC_LSEG_DSYNC) sc_op = &nilfs_sc_dsync_ops; @@ -1684,7 +1542,6 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, return 0; failed_bmap: - err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); return err; } @@ -1702,88 +1559,30 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) return 0; } -static int -nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) -{ - struct page *clone_page; - struct buffer_head *bh, *head, *bh2; - void *kaddr; - - bh = head = page_buffers(page); - - clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); - if (unlikely(!clone_page)) - return -ENOMEM; - - bh2 = page_buffers(clone_page); - kaddr = kmap_atomic(page, KM_USER0); - do { - if (list_empty(&bh->b_assoc_buffers)) - continue; - get_bh(bh2); - page_cache_get(clone_page); /* for each bh */ - memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); - bh2->b_blocknr = bh->b_blocknr; - list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); - list_add_tail(&bh->b_assoc_buffers, out); - } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); - kunmap_atomic(kaddr, KM_USER0); - - if (!TestSetPageWriteback(clone_page)) - inc_zone_page_state(clone_page, NR_WRITEBACK); - unlock_page(clone_page); - - return 0; -} - -static int nilfs_test_page_to_be_frozen(struct page *page) -{ - struct address_space *mapping = page->mapping; - - if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) - return 0; - - if (page_mapped(page)) { - ClearPageChecked(page); - return 1; - } - return PageChecked(page); -} - -static int nilfs_begin_page_io(struct page *page, struct list_head *out) +static void nilfs_begin_page_io(struct page *page) { if (!page || PageWriteback(page)) /* For split b-tree node pages, this function may be called twice. We ignore the 2nd or later calls by this check. */ - return 0; + return; lock_page(page); clear_page_dirty_for_io(page); set_page_writeback(page); unlock_page(page); - - if (nilfs_test_page_to_be_frozen(page)) { - int err = nilfs_copy_replace_page_buffers(page, out); - if (unlikely(err)) - return err; - } - return 0; } -static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, - struct page **failed_page) +static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; - struct list_head *list = &sci->sc_copied_buffers; - int err; - *failed_page = NULL; list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { struct buffer_head *bh; list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) { lock_page(bd_page); @@ -1797,7 +1596,8 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == sci->sc_super_root) { + set_buffer_async_write(bh); + if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); clear_page_dirty_for_io(bd_page); @@ -1808,11 +1608,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, break; } if (bh->b_page != fs_page) { - err = nilfs_begin_page_io(fs_page, list); - if (unlikely(err)) { - *failed_page = fs_page; - goto out; - } + nilfs_begin_page_io(fs_page); fs_page = bh->b_page; } } @@ -1823,51 +1619,45 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, set_page_writeback(bd_page); unlock_page(bd_page); } - err = nilfs_begin_page_io(fs_page, list); - if (unlikely(err)) - *failed_page = fs_page; - out: - return err; + nilfs_begin_page_io(fs_page); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, - struct backing_dev_info *bdi) + struct the_nilfs *nilfs) { - struct nilfs_segment_buffer *segbuf; - struct nilfs_write_info wi; - int err, res; - - wi.sb = sci->sc_super; - wi.bh_sr = sci->sc_super_root; - wi.bdi = bdi; - - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - nilfs_segbuf_prepare_write(segbuf, &wi); - err = nilfs_segbuf_write(segbuf, &wi); + int ret; - res = nilfs_segbuf_wait(segbuf, &wi); - err = unlikely(err) ? : res; - if (unlikely(err)) - return err; - } - return 0; + ret = nilfs_write_logs(&sci->sc_segbufs, nilfs); + list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs); + return ret; } -static int nilfs_page_has_uncleared_buffer(struct page *page) +static void nilfs_end_page_io(struct page *page, int err) { - struct buffer_head *head, *bh; + if (!page) + return; - head = bh = page_buffers(page); - do { - if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers)) - return 1; - bh = bh->b_this_page; - } while (bh != head); - return 0; -} + if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) { + /* + * For b-tree node pages, this function may be called twice + * or more because they might be split in a segment. + */ + if (PageDirty(page)) { + /* + * For pages holding split b-tree node buffers, dirty + * flag on the buffers may be cleared discretely. + * In that case, the page is once redirtied for + * remaining buffers, and it must be cancelled if + * all the buffers get cleaned later. + */ + lock_page(page); + if (nilfs_page_buffers_clean(page)) + __nilfs_clear_page_dirty(page); + unlock_page(page); + } + return; + } -static void __nilfs_end_page_io(struct page *page, int err) -{ if (!err) { if (!nilfs_page_buffers_clean(page)) __set_page_dirty_nobuffers(page); @@ -1877,68 +1667,22 @@ static void __nilfs_end_page_io(struct page *page, int err) SetPageError(page); } - if (buffer_nilfs_allocated(page_buffers(page))) { - if (TestClearPageWriteback(page)) - dec_zone_page_state(page, NR_WRITEBACK); - } else - end_page_writeback(page); + end_page_writeback(page); } -static void nilfs_end_page_io(struct page *page, int err) -{ - if (!page) - return; - - if (buffer_nilfs_node(page_buffers(page)) && - nilfs_page_has_uncleared_buffer(page)) - /* For b-tree node pages, this function may be called twice - or more because they might be split in a segment. - This check assures that cleanup has been done for all - buffers in a split btnode page. */ - return; - - __nilfs_end_page_io(page, err); -} - -static void nilfs_clear_copied_buffers(struct list_head *list, int err) -{ - struct buffer_head *bh, *head; - struct page *page; - - while (!list_empty(list)) { - bh = list_entry(list->next, struct buffer_head, - b_assoc_buffers); - page = bh->b_page; - page_cache_get(page); - head = bh = page_buffers(page); - do { - if (!list_empty(&bh->b_assoc_buffers)) { - list_del_init(&bh->b_assoc_buffers); - if (!err) { - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - clear_buffer_nilfs_volatile(bh); - } - brelse(bh); /* for b_assoc_buffers */ - } - } while ((bh = bh->b_this_page) != head); - - __nilfs_end_page_io(page, err); - page_cache_release(page); - } -} - -static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, - struct page *failed_page, int err) +static void nilfs_abort_logs(struct list_head *logs, int err) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; + struct buffer_head *bh; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; + if (list_empty(logs)) + return; + list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1948,7 +1692,8 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == sci->sc_super_root) { + clear_buffer_async_write(bh); + if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; @@ -1957,8 +1702,6 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, } if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); - if (unlikely(fs_page == failed_page)) - goto done; fs_page = bh->b_page; } } @@ -1967,8 +1710,31 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, end_page_writeback(bd_page); nilfs_end_page_io(fs_page, err); - done: - nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); +} + +static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int err) +{ + LIST_HEAD(logs); + int ret; + + list_splice_tail_init(&sci->sc_write_logs, &logs); + ret = nilfs_wait_on_logs(&logs); + nilfs_abort_logs(&logs, ret ? : err); + + list_splice_tail_init(&sci->sc_segbufs, &logs); + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); + nilfs_free_incomplete_logs(&logs, nilfs); + + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(ret); /* do not happen */ + } + + nilfs_destroy_logs(&logs); } static void nilfs_set_next_segment(struct the_nilfs *nilfs, @@ -1986,17 +1752,17 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; - int update_sr = (sci->sc_super_root != NULL); + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; + int update_sr = false; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { struct buffer_head *bh; list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -2018,12 +1784,16 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); + clear_buffer_delay(bh); clear_buffer_nilfs_volatile(bh); - if (bh == sci->sc_super_root) { + clear_buffer_nilfs_redirected(bh); + if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; } + update_sr = true; break; } if (bh->b_page != fs_page) { @@ -2032,12 +1802,12 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) } } - if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) { - if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) { + if (!nilfs_segbuf_simplex(segbuf)) { + if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) { set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); sci->sc_lseg_stime = jiffies; } - if (NILFS_SEG_LOGEND(&segbuf->sb_sum)) + if (segbuf->sb_sum.flags & NILFS_SS_LOGEND) clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); } } @@ -2050,103 +1820,100 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_end_page_io(fs_page, 0); - nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); - nilfs_drop_collected_inodes(&sci->sc_dirty_files); - if (nilfs_doing_gc()) { + if (nilfs_doing_gc()) nilfs_drop_collected_inodes(&sci->sc_gc_inodes); - if (update_sr) - nilfs_commit_gcdat_inode(nilfs); - } else + else nilfs->ns_nongc_ctime = sci->sc_seg_ctime; sci->sc_nblk_inc += sci->sc_nblk_this_inc; - segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs); nilfs_set_next_segment(nilfs, segbuf); if (update_sr) { nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, segbuf->sb_sum.seg_seq, nilfs->ns_cno++); - sbi->s_super->s_dirt = 1; clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); + nilfs_segctor_clear_metadata_dirty(sci); } else clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); } -static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, - struct nilfs_sb_info *sbi) +static int nilfs_segctor_wait(struct nilfs_sc_info *sci) +{ + int ret; + + ret = nilfs_wait_on_logs(&sci->sc_write_logs); + if (!ret) { + nilfs_segctor_complete_write(sci); + nilfs_destroy_logs(&sci->sc_write_logs); + } + return ret; +} + +static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) { struct nilfs_inode_info *ii, *n; - __u64 cno = sbi->s_nilfs->ns_cno; + struct inode *ifile = sci->sc_root->ifile; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); retry: - list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { + list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) { if (!ii->i_bh) { struct buffer_head *ibh; int err; - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); err = nilfs_ifile_get_inode_block( - sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); + ifile, ii->vfs_inode.i_ino, &ibh); if (unlikely(err)) { - nilfs_warning(sbi->s_super, __func__, + nilfs_warning(sci->sc_super, __func__, "failed to get inode block.\n"); return err; } - nilfs_mdt_mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(sbi->s_ifile); - spin_lock(&sbi->s_inode_lock); + mark_buffer_dirty(ibh); + nilfs_mdt_mark_dirty(ifile); + spin_lock(&nilfs->ns_inode_lock); if (likely(!ii->i_bh)) ii->i_bh = ibh; else brelse(ibh); goto retry; } - ii->i_cno = cno; clear_bit(NILFS_I_QUEUED, &ii->i_state); set_bit(NILFS_I_BUSY, &ii->i_state); - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); + list_move_tail(&ii->i_dirty, &sci->sc_dirty_files); } - spin_unlock(&sbi->s_inode_lock); - - NILFS_I(sbi->s_ifile)->i_cno = cno; + spin_unlock(&nilfs->ns_inode_lock); return 0; } -static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, - struct nilfs_sb_info *sbi) +static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) { struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_inode_info *ii, *n; - __u64 cno = sbi->s_nilfs->ns_cno; - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || - test_bit(NILFS_I_DIRTY, &ii->i_state)) { - /* The current checkpoint number (=nilfs->ns_cno) is - changed between check-in and check-out only if the - super root is written out. So, we can update i_cno - for the inodes that remain in the dirty list. */ - ii->i_cno = cno; + test_bit(NILFS_I_DIRTY, &ii->i_state)) continue; - } + clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &ti->ti_garbage); + list_move_tail(&ii->i_dirty, &ti->ti_garbage); } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); } /* @@ -2154,18 +1921,17 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, */ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; - struct page *failed_page; - int err, has_sr = 0; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; + int err; sci->sc_stage.scnt = NILFS_ST_INIT; + sci->sc_cno = nilfs->ns_cno; - err = nilfs_segctor_check_in_files(sci, sbi); + err = nilfs_segctor_collect_dirty_files(sci, nilfs); if (unlikely(err)) goto out; - if (nilfs_test_metadata_dirty(sbi)) + if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) set_bit(NILFS_SC_DIRTY, &sci->sc_flags); if (nilfs_segctor_clean(sci)) @@ -2185,12 +1951,10 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) goto failed; - has_sr = (sci->sc_super_root != NULL); - /* Avoid empty segment */ if (sci->sc_stage.scnt == NILFS_ST_DONE && - NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { - nilfs_segctor_end_construction(sci, nilfs, 1); + nilfs_segbuf_empty(sci->sc_curseg)) { + nilfs_segctor_abort_construction(sci, nilfs, 1); goto out; } @@ -2199,62 +1963,60 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) goto failed; if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) - nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); + nilfs_segctor_fill_in_file_bmap(sci); - if (has_sr) { + if (mode == SC_LSEG_SR && + sci->sc_stage.scnt >= NILFS_ST_CPFILE) { err = nilfs_segctor_fill_in_checkpoint(sci); if (unlikely(err)) - goto failed_to_make_up; + goto failed_to_write; nilfs_segctor_fill_in_super_root(sci, nilfs); } nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - err = nilfs_segctor_prepare_write(sci, &failed_page); - if (unlikely(err)) - goto failed_to_write; + nilfs_segctor_prepare_write(sci); - nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); + nilfs_add_checksums_on_logs(&sci->sc_segbufs, + nilfs->ns_crc_seed); - err = nilfs_segctor_write(sci, nilfs->ns_bdi); + err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) goto failed_to_write; - nilfs_segctor_complete_write(sci); - - /* Commit segments */ - if (has_sr) { - nilfs_segctor_commit_free_segments(sci); - nilfs_segctor_clear_metadata_dirty(sci); + if (sci->sc_stage.scnt == NILFS_ST_DONE || + nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { + /* + * At this point, we avoid double buffering + * for blocksize < pagesize because page dirty + * flag is turned off during write and dirty + * buffers are not properly collected for + * pages crossing over segments. + */ + err = nilfs_segctor_wait(sci); + if (err) + goto failed_to_write; } - - nilfs_segctor_end_construction(sci, nilfs, 0); - } while (sci->sc_stage.scnt != NILFS_ST_DONE); out: - nilfs_segctor_destroy_segment_buffers(sci); - nilfs_segctor_check_out_files(sci, sbi); + nilfs_segctor_drop_written_files(sci, nilfs); return err; failed_to_write: - nilfs_segctor_abort_write(sci, failed_page, err); - nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile); - - failed_to_make_up: if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) nilfs_redirty_inodes(&sci->sc_dirty_files); failed: if (nilfs_doing_gc()) nilfs_redirty_inodes(&sci->sc_gc_inodes); - nilfs_segctor_end_construction(sci, nilfs, err); + nilfs_segctor_abort_construction(sci, nilfs, err); goto out; } /** - * nilfs_secgtor_start_timer - set timer of background write + * nilfs_segctor_start_timer - set timer of background write * @sci: nilfs_sc_info * * If the timer has already been set, it ignores the new request. @@ -2264,9 +2026,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) { spin_lock(&sci->sc_state_lock); - if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { - sci->sc_timer->expires = jiffies + sci->sc_interval; - add_timer(sci->sc_timer); + if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { + sci->sc_timer.expires = jiffies + sci->sc_interval; + add_timer(&sci->sc_timer); sci->sc_state |= NILFS_SEGCTOR_COMMIT; } spin_unlock(&sci->sc_state_lock); @@ -2292,8 +2054,8 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) */ void nilfs_flush_segment(struct super_block *sb, ino_t ino) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; if (!sci || nilfs_doing_construction()) return; @@ -2301,48 +2063,6 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino) /* assign bit 0 to data files */ } -int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, - __u64 *segnum, size_t nsegs) -{ - struct nilfs_segment_entry *ent; - struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; - struct inode *sufile = nilfs->ns_sufile; - LIST_HEAD(list); - __u64 *pnum; - size_t i; - int err; - - for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { - ent = nilfs_alloc_segment_entry(*pnum); - if (unlikely(!ent)) { - err = -ENOMEM; - goto failed; - } - list_add_tail(&ent->list, &list); - - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - - if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su))) - printk(KERN_WARNING "NILFS: unused segment is " - "requested to be cleaned (segnum=%llu)\n", - (unsigned long long)ent->segnum); - nilfs_close_segment_entry(ent, sufile); - } - list_splice(&list, sci->sc_cleaning_segments.prev); - return 0; - - failed: - nilfs_dispose_segment_list(&list); - return err; -} - -void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci) -{ - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); -} - struct nilfs_segctor_wait_request { wait_queue_t wq; __u32 seq; @@ -2424,8 +2144,8 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) */ int nilfs_construct_segment(struct super_block *sb) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_transaction_info *ti; int err; @@ -2462,8 +2182,8 @@ int nilfs_construct_segment(struct super_block *sb) int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, loff_t start, loff_t end) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_inode_info *ii; struct nilfs_transaction_info ti; int err = 0; @@ -2471,101 +2191,118 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, if (!sci) return -EROFS; - nilfs_transaction_lock(sbi, &ti, 0); + nilfs_transaction_lock(sb, &ti, 0); ii = NILFS_I(inode); if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || - nilfs_test_opt(sbi, STRICT_ORDER) || + nilfs_test_opt(nilfs, STRICT_ORDER) || test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || - nilfs_discontinued(sbi->s_nilfs)) { - nilfs_transaction_unlock(sbi); + nilfs_discontinued(nilfs)) { + nilfs_transaction_unlock(sb); err = nilfs_segctor_sync(sci); return err; } - spin_lock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && !test_bit(NILFS_I_BUSY, &ii->i_state)) { - spin_unlock(&sbi->s_inode_lock); - nilfs_transaction_unlock(sbi); + spin_unlock(&nilfs->ns_inode_lock); + nilfs_transaction_unlock(sb); return 0; } - spin_unlock(&sbi->s_inode_lock); + spin_unlock(&nilfs->ns_inode_lock); sci->sc_dsync_inode = ii; sci->sc_dsync_start = start; sci->sc_dsync_end = end; err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); - nilfs_transaction_unlock(sbi); + nilfs_transaction_unlock(sb); return err; } -struct nilfs_segctor_req { - int mode; - __u32 seq_accepted; - int sc_err; /* construction failure */ - int sb_err; /* super block writeback failure */ -}; - #define FLUSH_FILE_BIT (0x1) /* data file only */ #define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ -static void nilfs_segctor_accept(struct nilfs_sc_info *sci, - struct nilfs_segctor_req *req) +/** + * nilfs_segctor_accept - record accepted sequence count of log-write requests + * @sci: segment constructor object + */ +static void nilfs_segctor_accept(struct nilfs_sc_info *sci) { - req->sc_err = req->sb_err = 0; spin_lock(&sci->sc_state_lock); - req->seq_accepted = sci->sc_seq_request; + sci->sc_seq_accepted = sci->sc_seq_request; spin_unlock(&sci->sc_state_lock); - - if (sci->sc_timer) - del_timer_sync(sci->sc_timer); + del_timer_sync(&sci->sc_timer); } -static void nilfs_segctor_notify(struct nilfs_sc_info *sci, - struct nilfs_segctor_req *req) +/** + * nilfs_segctor_notify - notify the result of request to caller threads + * @sci: segment constructor object + * @mode: mode of log forming + * @err: error code to be notified + */ +static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) { /* Clear requests (even when the construction failed) */ spin_lock(&sci->sc_state_lock); - sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; - - if (req->mode == SC_LSEG_SR) { - sci->sc_seq_done = req->seq_accepted; - nilfs_segctor_wakeup(sci, req->sc_err ? : req->sb_err); + if (mode == SC_LSEG_SR) { + sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; + sci->sc_seq_done = sci->sc_seq_accepted; + nilfs_segctor_wakeup(sci, err); sci->sc_flush_request = 0; - } else if (req->mode == SC_FLUSH_FILE) - sci->sc_flush_request &= ~FLUSH_FILE_BIT; - else if (req->mode == SC_FLUSH_DAT) - sci->sc_flush_request &= ~FLUSH_DAT_BIT; + } else { + if (mode == SC_FLUSH_FILE) + sci->sc_flush_request &= ~FLUSH_FILE_BIT; + else if (mode == SC_FLUSH_DAT) + sci->sc_flush_request &= ~FLUSH_DAT_BIT; + /* re-enable timer if checkpoint creation was not done */ + if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_before(jiffies, sci->sc_timer.expires)) + add_timer(&sci->sc_timer); + } spin_unlock(&sci->sc_state_lock); } -static int nilfs_segctor_construct(struct nilfs_sc_info *sci, - struct nilfs_segctor_req *req) +/** + * nilfs_segctor_construct - form logs and write them to disk + * @sci: segment constructor object + * @mode: mode of log forming + */ +static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) { - struct nilfs_sb_info *sbi = sci->sc_sbi; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; + struct nilfs_super_block **sbp; int err = 0; + nilfs_segctor_accept(sci); + if (nilfs_discontinued(nilfs)) - req->mode = SC_LSEG_SR; - if (!nilfs_segctor_confirm(sci)) { - err = nilfs_segctor_do_construct(sci, req->mode); - req->sc_err = err; - } + mode = SC_LSEG_SR; + if (!nilfs_segctor_confirm(sci)) + err = nilfs_segctor_do_construct(sci, mode); + if (likely(!err)) { - if (req->mode != SC_FLUSH_DAT) + if (mode != SC_FLUSH_DAT) atomic_set(&nilfs->ns_ndirtyblks, 0); if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && nilfs_discontinued(nilfs)) { down_write(&nilfs->ns_sem); - req->sb_err = nilfs_commit_super(sbi, 0); + err = -EIO; + sbp = nilfs_prepare_super(sci->sc_super, + nilfs_sb_will_flip(nilfs)); + if (likely(sbp)) { + nilfs_set_log_cursor(sbp[0], nilfs); + err = nilfs_commit_super(sci->sc_super, + NILFS_SB_COMMIT); + } up_write(&nilfs->ns_sem); } } + + nilfs_segctor_notify(sci, mode, err); return err; } @@ -2583,41 +2320,43 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) list_for_each_entry_safe(ii, n, head, i_dirty) { if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) continue; - hlist_del_init(&ii->vfs_inode.i_hash); list_del_init(&ii->i_dirty); - nilfs_clear_gcinode(&ii->vfs_inode); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); + iput(&ii->vfs_inode); } } int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, void **kbufs) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_sc_info *sci = NILFS_SC(sbi); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_transaction_info ti; - struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; int err; if (unlikely(!sci)) return -EROFS; - nilfs_transaction_lock(sbi, &ti, 1); + nilfs_transaction_lock(sb, &ti, 1); - err = nilfs_init_gcdat_inode(nilfs); + err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); if (unlikely(err)) goto out_unlock; + err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); - if (unlikely(err)) + if (unlikely(err)) { + nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat); goto out_unlock; + } - list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); + sci->sc_freesegs = kbufs[4]; + sci->sc_nfreesegs = argv[4].v_nmembs; + list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes); for (;;) { - nilfs_segctor_accept(sci, &req); - err = nilfs_segctor_construct(sci, &req); + err = nilfs_segctor_construct(sci, SC_LSEG_SR); nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); - nilfs_segctor_notify(sci, &req); if (likely(!err)) break; @@ -2627,24 +2366,31 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(sci->sc_interval); } + if (nilfs_test_opt(nilfs, DISCARD)) { + int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, + sci->sc_nfreesegs); + if (ret) { + printk(KERN_WARNING + "NILFS warning: error %d on discard request, " + "turning discards off for the device\n", ret); + nilfs_clear_opt(nilfs, DISCARD); + } + } out_unlock: - nilfs_clear_gcdat_inode(nilfs); - nilfs_transaction_unlock(sbi); + sci->sc_freesegs = NULL; + sci->sc_nfreesegs = 0; + nilfs_mdt_clear_shadow_map(nilfs->ns_dat); + nilfs_transaction_unlock(sb); return err; } static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) { - struct nilfs_sb_info *sbi = sci->sc_sbi; struct nilfs_transaction_info ti; - struct nilfs_segctor_req req = { .mode = mode }; - - nilfs_transaction_lock(sbi, &ti, 0); - nilfs_segctor_accept(sci, &req); - nilfs_segctor_construct(sci, &req); - nilfs_segctor_notify(sci, &req); + nilfs_transaction_lock(sci->sc_super, &ti, 0); + nilfs_segctor_construct(sci, mode); /* * Unclosed segment should be retried. We do this using sc_timer. @@ -2654,7 +2400,7 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) nilfs_segctor_start_timer(sci); - nilfs_transaction_unlock(sbi); + nilfs_transaction_unlock(sci->sc_super); } static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) @@ -2700,13 +2446,11 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; - struct timer_list timer; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int timeout = 0; - init_timer(&timer); - timer.data = (unsigned long)current; - timer.function = nilfs_construction_timeout; - sci->sc_timer = &timer; + sci->sc_timer.data = (unsigned long)current; + sci->sc_timer.function = nilfs_construction_timeout; /* start sync. */ sci->sc_task = current; @@ -2740,7 +2484,7 @@ static int nilfs_segctor_thread(void *arg) if (freezing(current)) { spin_unlock(&sci->sc_state_lock); - refrigerator(); + try_to_freeze(); spin_lock(&sci->sc_state_lock); } else { DEFINE_WAIT(wait); @@ -2755,7 +2499,7 @@ static int nilfs_segctor_thread(void *arg) should_sleep = 0; else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) should_sleep = time_before(jiffies, - sci->sc_timer->expires); + sci->sc_timer.expires); if (should_sleep) { spin_unlock(&sci->sc_state_lock); @@ -2764,14 +2508,15 @@ static int nilfs_segctor_thread(void *arg) } finish_wait(&sci->sc_wait_daemon, &wait); timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && - time_after_eq(jiffies, sci->sc_timer->expires)); + time_after_eq(jiffies, sci->sc_timer.expires)); + + if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) + set_nilfs_discontinued(nilfs); } goto loop; end_thread: spin_unlock(&sci->sc_state_lock); - del_timer_sync(sci->sc_timer); - sci->sc_timer = NULL; /* end sync. */ sci->sc_task = NULL; @@ -2796,6 +2541,8 @@ static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) } static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) + __acquires(&sci->sc_state_lock) + __releases(&sci->sc_state_lock) { sci->sc_state |= NILFS_SEGCTOR_QUIT; @@ -2807,26 +2554,23 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) } } -static int nilfs_segctor_init(struct nilfs_sc_info *sci) -{ - sci->sc_seq_done = sci->sc_seq_request; - - return nilfs_segctor_start_thread(sci); -} - /* * Setup & clean-up functions */ -static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) +static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, + struct nilfs_root *root) { + struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_sc_info *sci; sci = kzalloc(sizeof(*sci), GFP_KERNEL); if (!sci) return NULL; - sci->sc_sbi = sbi; - sci->sc_super = sbi->s_super; + sci->sc_super = sb; + + nilfs_get_root(root); + sci->sc_root = root; init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); @@ -2834,18 +2578,18 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); + INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); - INIT_LIST_HEAD(&sci->sc_cleaning_segments); - INIT_LIST_HEAD(&sci->sc_copied_buffers); + init_timer(&sci->sc_timer); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; - if (sbi->s_interval) - sci->sc_interval = sbi->s_interval; - if (sbi->s_watermark) - sci->sc_watermark = sbi->s_watermark; + if (nilfs->ns_interval) + sci->sc_interval = HZ * nilfs->ns_interval; + if (nilfs->ns_watermark) + sci->sc_watermark = nilfs->ns_watermark; return sci; } @@ -2856,15 +2600,11 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) /* The segctord thread was stopped and its timer was removed. But some tasks remain. */ do { - struct nilfs_sb_info *sbi = sci->sc_sbi; struct nilfs_transaction_info ti; - struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; - nilfs_transaction_lock(sbi, &ti, 0); - nilfs_segctor_accept(sci, &req); - ret = nilfs_segctor_construct(sci, &req); - nilfs_segctor_notify(sci, &req); - nilfs_transaction_unlock(sbi); + nilfs_transaction_lock(sci->sc_super, &ti, 0); + ret = nilfs_segctor_construct(sci, SC_LSEG_SR); + nilfs_transaction_unlock(sci->sc_super); } while (ret && retrycount-- > 0); } @@ -2879,10 +2619,10 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) */ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) { - struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int flag; - up_write(&sbi->s_nilfs->ns_segctor_sem); + up_write(&nilfs->ns_segctor_sem); spin_lock(&sci->sc_state_lock); nilfs_segctor_kill_thread(sci); @@ -2890,89 +2630,92 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); - if (flag || nilfs_segctor_confirm(sci)) + if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); - WARN_ON(!list_empty(&sci->sc_copied_buffers)); - if (!list_empty(&sci->sc_dirty_files)) { - nilfs_warning(sbi->s_super, __func__, + nilfs_warning(sci->sc_super, __func__, "dirty file(s) after the final construction\n"); - nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); + nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); } - if (!list_empty(&sci->sc_cleaning_segments)) - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); - WARN_ON(!list_empty(&sci->sc_segbufs)); + WARN_ON(!list_empty(&sci->sc_write_logs)); - down_write(&sbi->s_nilfs->ns_segctor_sem); + nilfs_put_root(sci->sc_root); + down_write(&nilfs->ns_segctor_sem); + + del_timer_sync(&sci->sc_timer); kfree(sci); } /** - * nilfs_attach_segment_constructor - attach a segment constructor - * @sbi: nilfs_sb_info + * nilfs_attach_log_writer - attach log writer + * @sb: super block instance + * @root: root object of the current filesystem tree * - * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, - * initilizes it, and starts the segment constructor. + * This allocates a log writer object, initializes it, and starts the + * log writer. * * Return Value: On success, 0 is returned. On error, one of the following * negative error code is returned. * * %-ENOMEM - Insufficient memory available. */ -int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) +int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; int err; - /* Each field of nilfs_segctor is cleared through the initialization - of super-block info */ - sbi->s_sc_info = nilfs_segctor_new(sbi); - if (!sbi->s_sc_info) + if (nilfs->ns_writer) { + /* + * This happens if the filesystem was remounted + * read/write after nilfs_error degenerated it into a + * read-only mount. + */ + nilfs_detach_log_writer(sb); + } + + nilfs->ns_writer = nilfs_segctor_new(sb, root); + if (!nilfs->ns_writer) return -ENOMEM; - nilfs_attach_writer(nilfs, sbi); - err = nilfs_segctor_init(NILFS_SC(sbi)); + err = nilfs_segctor_start_thread(nilfs->ns_writer); if (err) { - nilfs_detach_writer(nilfs, sbi); - kfree(sbi->s_sc_info); - sbi->s_sc_info = NULL; + kfree(nilfs->ns_writer); + nilfs->ns_writer = NULL; } return err; } /** - * nilfs_detach_segment_constructor - destroy the segment constructor - * @sbi: nilfs_sb_info + * nilfs_detach_log_writer - destroy log writer + * @sb: super block instance * - * nilfs_detach_segment_constructor() kills the segment constructor daemon, - * frees the struct nilfs_sc_info, and destroy the dirty file list. + * This kills log writer daemon, frees the log writer object, and + * destroys list of dirty files. */ -void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) +void nilfs_detach_log_writer(struct super_block *sb) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; LIST_HEAD(garbage_list); down_write(&nilfs->ns_segctor_sem); - if (NILFS_SC(sbi)) { - nilfs_segctor_destroy(NILFS_SC(sbi)); - sbi->s_sc_info = NULL; + if (nilfs->ns_writer) { + nilfs_segctor_destroy(nilfs->ns_writer); + nilfs->ns_writer = NULL; } /* Force to free the list of dirty files */ - spin_lock(&sbi->s_inode_lock); - if (!list_empty(&sbi->s_dirty_files)) { - list_splice_init(&sbi->s_dirty_files, &garbage_list); - nilfs_warning(sbi->s_super, __func__, - "Non empty dirty list after the last " - "segment construction\n"); - } - spin_unlock(&sbi->s_inode_lock); + spin_lock(&nilfs->ns_inode_lock); + if (!list_empty(&nilfs->ns_dirty_files)) { + list_splice_init(&nilfs->ns_dirty_files, &garbage_list); + nilfs_warning(sb, __func__, + "Hit dirty file after stopped log writer\n"); + } + spin_unlock(&nilfs->ns_inode_lock); up_write(&nilfs->ns_segctor_sem); - nilfs_dispose_list(sbi, &garbage_list, 1); - nilfs_detach_writer(nilfs, sbi); + nilfs_dispose_list(nilfs, &garbage_list, 1); } diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 476bdd5df5b..38a1d001331 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -27,10 +27,12 @@ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/nilfs2_fs.h> -#include "sb.h" +#include "nilfs.h" + +struct nilfs_root; /** - * struct nilfs_recovery_info - Recovery infomation + * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status * @ri_super_root: Block number of the last super root * @ri_ri_cno: Number of the last checkpoint @@ -71,7 +73,7 @@ struct nilfs_recovery_info { */ struct nilfs_cstage { int scnt; - unsigned flags; + unsigned flags; struct nilfs_inode_info *dirty_file_ptr; struct nilfs_inode_info *gc_inode_ptr; }; @@ -86,19 +88,19 @@ struct nilfs_segsum_pointer { /** * struct nilfs_sc_info - Segment constructor information * @sc_super: Back pointer to super_block struct - * @sc_sbi: Back pointer to nilfs_sb_info struct + * @sc_root: root object of the current filesystem tree * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written - * @sc_cleaning_segments: List of segments to be freed through construction - * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data + * @sc_freesegs: array of segment numbers to be freed + * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation * @sc_dsync_start: start byte offset of data pages * @sc_dsync_end: end byte offset of data pages (inclusive) * @sc_segbufs: List of segment buffers + * @sc_write_logs: List of segment buffers to hold logs under writing * @sc_segbuf_nblocks: Number of available blocks in segment buffers. * @sc_curseg: Current segment buffer - * @sc_super_root: Pointer to the super root buffer * @sc_stage: Collection stage * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary @@ -106,6 +108,7 @@ struct nilfs_segsum_pointer { * @sc_datablk_cnt: Data block count of a file * @sc_nblk_this_inc: Number of blocks included in the current logical segment * @sc_seg_ctime: Creation time + * @sc_cno: checkpoint number of current log * @sc_flags: Internal flags * @sc_state_lock: spinlock for sc_state and so on * @sc_state: Segctord state flags @@ -114,6 +117,7 @@ struct nilfs_segsum_pointer { * @sc_wait_daemon: Daemon wait queue * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter + * @sc_seq_accept: Accepted request count * @sc_seq_done: Completion counter * @sc_sync: Request of explicit sync operation * @sc_interval: Timeout value of background construction @@ -125,14 +129,15 @@ struct nilfs_segsum_pointer { */ struct nilfs_sc_info { struct super_block *sc_super; - struct nilfs_sb_info *sc_sbi; + struct nilfs_root *sc_root; unsigned long sc_nblk_inc; struct list_head sc_dirty_files; struct list_head sc_gc_inodes; - struct list_head sc_cleaning_segments; - struct list_head sc_copied_buffers; + + __u64 *sc_freesegs; + size_t sc_nfreesegs; struct nilfs_inode_info *sc_dsync_inode; loff_t sc_dsync_start; @@ -140,9 +145,9 @@ struct nilfs_sc_info { /* Segment buffers */ struct list_head sc_segbufs; + struct list_head sc_write_logs; unsigned long sc_segbuf_nblocks; struct nilfs_segment_buffer *sc_curseg; - struct buffer_head *sc_super_root; struct nilfs_cstage sc_stage; @@ -152,7 +157,7 @@ struct nilfs_sc_info { unsigned long sc_datablk_cnt; unsigned long sc_nblk_this_inc; time_t sc_seg_ctime; - + __u64 sc_cno; unsigned long sc_flags; spinlock_t sc_state_lock; @@ -164,6 +169,7 @@ struct nilfs_sc_info { wait_queue_head_t sc_wait_task; __u32 sc_seq_request; + __u32 sc_seq_accepted; __u32 sc_seq_done; int sc_sync; @@ -172,7 +178,7 @@ struct nilfs_sc_info { unsigned long sc_lseg_stime; /* in 1/HZ seconds */ unsigned long sc_watermark; - struct timer_list *sc_timer; + struct timer_list sc_timer; struct task_struct *sc_task; }; @@ -212,10 +218,10 @@ enum { */ #define NILFS_SC_DEFAULT_WATERMARK 3600 +/* super.c */ +extern struct kmem_cache *nilfs_transaction_cachep; /* segment.c */ -extern int nilfs_init_transaction_cache(void); -extern void nilfs_destroy_transaction_cache(void); extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); @@ -225,20 +231,16 @@ extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); -extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, - __u64 *, size_t); -extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *); - -extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); -extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); +int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root); +void nilfs_detach_log_writer(struct super_block *sb); /* recovery.c */ -extern int nilfs_read_super_root_block(struct super_block *, sector_t, +extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t, struct buffer_head **, int); -extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, +extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_recovery_info *); -extern int nilfs_recover_logical_segments(struct the_nilfs *, - struct nilfs_sb_info *, - struct nilfs_recovery_info *); +int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb, + struct nilfs_recovery_info *ri); +extern void nilfs_dispose_segment_list(struct list_head *); #endif /* _NILFS_SEGMENT_H */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 98e68677f04..2a869c35c36 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Written by Koji Sato <koji@osrg.net>. + * Revised by Ryusuke Konishi <ryusuke@osrg.net>. */ #include <linux/kernel.h> @@ -29,6 +30,24 @@ #include "mdt.h" #include "sufile.h" +/** + * struct nilfs_sufile_info - on-memory private data of sufile + * @mi: on-memory private data of metadata file + * @ncleansegs: number of clean segments + * @allocmin: lower limit of allocatable segment range + * @allocmax: upper limit of allocatable segment range + */ +struct nilfs_sufile_info { + struct nilfs_mdt_info mi; + unsigned long ncleansegs;/* number of clean segments */ + __u64 allocmin; /* lower limit of allocatable segment range */ + __u64 allocmax; /* upper limit of allocatable segment range */ +}; + +static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) +{ + return (struct nilfs_sufile_info *)NILFS_MDT(sufile); +} static inline unsigned long nilfs_sufile_segment_usages_per_block(const struct inode *sufile) @@ -61,14 +80,6 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, max - curr + 1); } -static inline struct nilfs_sufile_header * -nilfs_sufile_block_get_header(const struct inode *sufile, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh); -} - static struct nilfs_segment_usage * nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, struct buffer_head *bh, void *kaddr) @@ -93,19 +104,131 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, create, NULL, bhp); } +static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile, + __u64 segnum) +{ + return nilfs_mdt_delete_block(sufile, + nilfs_sufile_get_blkoff(sufile, segnum)); +} + static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, u64 ncleanadd, u64 ndirtyadd) { struct nilfs_sufile_header *header; void *kaddr; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); + + mark_buffer_dirty(header_bh); +} + +/** + * nilfs_sufile_get_ncleansegs - return the number of clean segments + * @sufile: inode of segment usage file + */ +unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) +{ + return NILFS_SUI(sufile)->ncleansegs; +} + +/** + * nilfs_sufile_updatev - modify multiple segment usages at a time + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @create: creation flag + * @ndone: place to store number of modified segments on @segnumv + * @dofunc: primitive operation for the update + * + * Description: nilfs_sufile_updatev() repeatedly calls @dofunc + * against the given array of segments. The @dofunc is called with + * buffers of a header block and the sufile block in which the target + * segment usage entry is contained. If @ndone is given, the number + * of successfully modified segments from the head is stored in the + * place @ndone points to. + * + * Return Value: On success, zero is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - Given segment usage is in hole block (may be returned if + * @create is zero) + * + * %-EINVAL - Invalid segment usage number + */ +int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, + int create, size_t *ndone, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)) +{ + struct buffer_head *header_bh, *bh; + unsigned long blkoff, prev_blkoff; + __u64 *seg; + size_t nerr = 0, n = 0; + int ret = 0; + + if (unlikely(nsegs == 0)) + goto out; + + down_write(&NILFS_MDT(sufile)->mi_sem); + for (seg = segnumv; seg < segnumv + nsegs; seg++) { + if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) { + printk(KERN_WARNING + "%s: invalid segment number: %llu\n", __func__, + (unsigned long long)*seg); + nerr++; + } + } + if (nerr > 0) { + ret = -EINVAL; + goto out_sem; + } + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + seg = segnumv; + blkoff = nilfs_sufile_get_blkoff(sufile, *seg); + ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); + if (ret < 0) + goto out_header; + + for (;;) { + dofunc(sufile, *seg, header_bh, bh); + + if (++seg >= segnumv + nsegs) + break; + prev_blkoff = blkoff; + blkoff = nilfs_sufile_get_blkoff(sufile, *seg); + if (blkoff == prev_blkoff) + continue; + + /* get different block */ + brelse(bh); + ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); + if (unlikely(ret < 0)) + goto out_header; + } + brelse(bh); - nilfs_mdt_mark_buffer_dirty(header_bh); + out_header: + n = seg - segnumv; + brelse(header_bh); + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + out: + if (ndone) + *ndone = n; + return ret; } int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, @@ -140,6 +263,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, } /** + * nilfs_sufile_set_alloc_range - limit range of segment to be allocated + * @sufile: inode of segment usage file + * @start: minimum segment number of allocatable region (inclusive) + * @end: maximum segment number of allocatable region (inclusive) + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-ERANGE - invalid segment region + */ +int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) +{ + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + __u64 nsegs; + int ret = -ERANGE; + + down_write(&NILFS_MDT(sufile)->mi_sem); + nsegs = nilfs_sufile_get_nsegments(sufile); + + if (start <= end && end < nsegs) { + sui->allocmin = start; + sui->allocmax = end; + ret = 0; + } + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** * nilfs_sufile_alloc - allocate a segment * @sufile: inode of segment usage file * @segnump: pointer to segment number @@ -161,37 +313,56 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) struct buffer_head *header_bh, *su_bh; struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; void *kaddr; - unsigned long nsegments, ncleansegs, nsus; - int ret, i, j; + unsigned long nsegments, ncleansegs, nsus, cnt; + int ret, j; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + kaddr = kmap_atomic(header_bh->b_page); + header = kaddr + bh_offset(header_bh); ncleansegs = le64_to_cpu(header->sh_ncleansegs); last_alloc = le64_to_cpu(header->sh_last_alloc); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nsegments = nilfs_sufile_get_nsegments(sufile); + maxsegnum = sui->allocmax; segnum = last_alloc + 1; - maxsegnum = nsegments - 1; - for (i = 0; i < nsegments; i += nsus) { - if (segnum >= nsegments) { - /* wrap around */ - segnum = 0; - maxsegnum = last_alloc; + if (segnum < sui->allocmin || segnum > sui->allocmax) + segnum = sui->allocmin; + + for (cnt = 0; cnt < nsegments; cnt += nsus) { + if (segnum > maxsegnum) { + if (cnt < sui->allocmax - sui->allocmin + 1) { + /* + * wrap around in the limited region. + * if allocation started from + * sui->allocmin, this never happens. + */ + segnum = sui->allocmin; + maxsegnum = last_alloc; + } else if (segnum > sui->allocmin && + sui->allocmax + 1 < nsegments) { + segnum = sui->allocmax + 1; + maxsegnum = nsegments - 1; + } else if (sui->allocmin > 0) { + segnum = 0; + maxsegnum = sui->allocmin - 1; + } else { + break; /* never happens */ + } } ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &su_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); @@ -202,25 +373,25 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) continue; /* found a clean segment */ nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header( - sufile, header_bh, kaddr); + kaddr = kmap_atomic(header_bh->b_page); + header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - nilfs_mdt_mark_buffer_dirty(header_bh); - nilfs_mdt_mark_buffer_dirty(su_bh); + sui->ncleansegs--; + mark_buffer_dirty(header_bh); + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); brelse(su_bh); *segnump = segnum; goto out_header; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(su_bh); } @@ -242,19 +413,21 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct nilfs_segment_usage *su; void *kaddr; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { printk(KERN_WARNING "%s: segment %llu must be clean\n", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_sufile_mod_counter(header_bh, -1, 1); - nilfs_mdt_mark_buffer_dirty(su_bh); + NILFS_SUI(sufile)->ncleansegs--; + + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -266,11 +439,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, void *kaddr; int clean, dirty; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && su->su_nblocks == cpu_to_le32(0)) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } clean = nilfs_segment_usage_clean(su); @@ -280,10 +453,12 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); - nilfs_mdt_mark_buffer_dirty(su_bh); + NILFS_SUI(sufile)->ncleansegs -= clean; + + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -295,12 +470,12 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, void *kaddr; int sudirty; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_clean(su)) { printk(KERN_WARNING "%s: segment %llu is already clean\n", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } WARN_ON(nilfs_segment_usage_error(su)); @@ -308,83 +483,69 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, sudirty = nilfs_segment_usage_dirty(su); nilfs_segment_usage_set_clean(su); - kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(su_bh); + kunmap_atomic(kaddr); + mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); + NILFS_SUI(sufile)->ncleansegs++; + nilfs_mdt_mark_dirty(sufile); } /** - * nilfs_sufile_get_segment_usage - get a segment usage + * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty * @sufile: inode of segment usage file * @segnum: segment number - * @sup: pointer to segment usage - * @bhp: pointer to buffer head - * - * Description: nilfs_sufile_get_segment_usage() acquires the segment usage - * specified by @segnum. - * - * Return Value: On success, 0 is returned, and the segment usage and the - * buffer head of the buffer on which the segment usage is located are stored - * in the place pointed by @sup and @bhp, respectively. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid segment usage number. */ -int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, - struct nilfs_segment_usage **sup, - struct buffer_head **bhp) +int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; - struct nilfs_segment_usage *su; - void *kaddr; int ret; - /* segnum is 0 origin */ - if (segnum >= nilfs_sufile_get_nsegments(sufile)) - return -EINVAL; - down_write(&NILFS_MDT(sufile)->mi_sem); - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh); - if (ret < 0) - goto out_sem; - kaddr = kmap(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - if (nilfs_segment_usage_error(su)) { - kunmap(bh->b_page); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); + if (!ret) { + mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); brelse(bh); - ret = -EINVAL; - goto out_sem; } - - if (sup != NULL) - *sup = su; - *bhp = bh; - - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** - * nilfs_sufile_put_segment_usage - put a segment usage + * nilfs_sufile_set_segment_usage - set usage of a segment * @sufile: inode of segment usage file * @segnum: segment number - * @bh: buffer head - * - * Description: nilfs_sufile_put_segment_usage() releases the segment usage - * specified by @segnum. @bh must be the buffer head which have been returned - * by a previous call to nilfs_sufile_get_segment_usage() with @segnum. + * @nblocks: number of live blocks in the segment + * @modtime: modification time (option) */ -void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, - struct buffer_head *bh) +int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, + unsigned long nblocks, time_t modtime) { - kunmap(bh->b_page); + struct buffer_head *bh; + struct nilfs_segment_usage *su; + void *kaddr; + int ret; + + down_write(&NILFS_MDT(sufile)->mi_sem); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); + if (ret < 0) + goto out_sem; + + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + WARN_ON(nilfs_segment_usage_error(su)); + if (modtime) + su->su_lastmod = cpu_to_le64(modtime); + su->su_nblocks = cpu_to_le32(nblocks); + kunmap_atomic(kaddr); + + mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); brelse(bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; } /** @@ -407,7 +568,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; - struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; void *kaddr; int ret; @@ -417,8 +578,8 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + kaddr = kmap_atomic(header_bh->b_page); + header = kaddr + bh_offset(header_bh); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); @@ -427,7 +588,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(header_bh); out_sem: @@ -435,33 +596,6 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) return ret; } -/** - * nilfs_sufile_get_ncleansegs - get the number of clean segments - * @sufile: inode of segment usage file - * @nsegsp: pointer to the number of clean segments - * - * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean - * segments. - * - * Return Value: On success, 0 is returned and the number of clean segments is - * stored in the place pointed by @nsegsp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) -{ - struct nilfs_sustat sustat; - int ret; - - ret = nilfs_sufile_get_stat(sufile, &sustat); - if (ret == 0) - *nsegsp = sustat.ss_ncleansegs; - return ret; -} - void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) @@ -470,27 +604,202 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, void *kaddr; int suclean; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_error(su)) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - if (suclean) + if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); - nilfs_mdt_mark_buffer_dirty(su_bh); + NILFS_SUI(sufile)->ncleansegs--; + } + mark_buffer_dirty(su_bh); + nilfs_mdt_mark_dirty(sufile); +} + +/** + * nilfs_sufile_truncate_range - truncate range of segment array + * @sufile: inode of segment usage file + * @start: start segment number (inclusive) + * @end: end segment number (inclusive) + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid number of segments specified + * + * %-EBUSY - Dirty or active segments are present in the range + */ +static int nilfs_sufile_truncate_range(struct inode *sufile, + __u64 start, __u64 end) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh; + struct buffer_head *su_bh; + struct nilfs_segment_usage *su, *su2; + size_t susz = NILFS_MDT(sufile)->mi_entry_size; + unsigned long segusages_per_block; + unsigned long nsegs, ncleaned; + __u64 segnum; + void *kaddr; + ssize_t n, nc; + int ret; + int j; + + nsegs = nilfs_sufile_get_nsegments(sufile); + + ret = -EINVAL; + if (start > end || start >= nsegs) + goto out; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out; + + segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); + ncleaned = 0; + + for (segnum = start; segnum <= end; segnum += n) { + n = min_t(unsigned long, + segusages_per_block - + nilfs_sufile_get_offset(sufile, segnum), + end - segnum + 1); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, + &su_bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out_header; + /* hole */ + continue; + } + kaddr = kmap_atomic(su_bh->b_page); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum, su_bh, kaddr); + su2 = su; + for (j = 0; j < n; j++, su = (void *)su + susz) { + if ((le32_to_cpu(su->su_flags) & + ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) || + nilfs_segment_is_active(nilfs, segnum + j)) { + ret = -EBUSY; + kunmap_atomic(kaddr); + brelse(su_bh); + goto out_header; + } + } + nc = 0; + for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) { + if (nilfs_segment_usage_error(su)) { + nilfs_segment_usage_set_clean(su); + nc++; + } + } + kunmap_atomic(kaddr); + if (nc > 0) { + mark_buffer_dirty(su_bh); + ncleaned += nc; + } + brelse(su_bh); + + if (n == segusages_per_block) { + /* make hole */ + nilfs_sufile_delete_segment_usage_block(sufile, segnum); + } + } + ret = 0; + +out_header: + if (ncleaned > 0) { + NILFS_SUI(sufile)->ncleansegs += ncleaned; + nilfs_sufile_mod_counter(header_bh, ncleaned, 0); + nilfs_mdt_mark_dirty(sufile); + } + brelse(header_bh); +out: + return ret; +} + +/** + * nilfs_sufile_resize - resize segment array + * @sufile: inode of segment usage file + * @newnsegs: new number of segments + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOSPC - Enough free space is not left for shrinking + * + * %-EBUSY - Dirty or active segments exist in the region to be truncated + */ +int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh; + struct nilfs_sufile_header *header; + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + void *kaddr; + unsigned long nsegs, nrsvsegs; + int ret = 0; + + down_write(&NILFS_MDT(sufile)->mi_sem); + + nsegs = nilfs_sufile_get_nsegments(sufile); + if (nsegs == newnsegs) + goto out; + + ret = -ENOSPC; + nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs); + if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs) + goto out; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out; + + if (newnsegs > nsegs) { + sui->ncleansegs += newnsegs - nsegs; + } else /* newnsegs < nsegs */ { + ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1); + if (ret < 0) + goto out_header; + + sui->ncleansegs -= nsegs - newnsegs; + } + + kaddr = kmap_atomic(header_bh->b_page); + header = kaddr + bh_offset(header_bh); + header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); + kunmap_atomic(kaddr); + + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); + nilfs_set_nsegments(nilfs, newnsegs); + +out_header: + brelse(header_bh); +out: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; } /** * nilfs_sufile_get_suinfo - * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @si: array of suinfo + * @buf: array of suinfo + * @sisz: byte size of suinfo * @nsi: size of suinfo array * * Description: @@ -502,13 +811,14 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, * * %-ENOMEM - Insufficient amount of memory available. */ -ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, - struct nilfs_suinfo *si, size_t nsi) +ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, + unsigned sisz, size_t nsi) { struct buffer_head *su_bh; struct nilfs_segment_usage *su; + struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; - struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -531,23 +841,25 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, if (ret != -ENOENT) goto out; /* hole */ - memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n); + memset(si, 0, sisz * n); + si = (void *)si + sisz * n; continue; } - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); - for (j = 0; j < n; j++, su = (void *)su + susz) { - si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); - si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); - si[i + j].sui_flags = le32_to_cpu(su->su_flags) & + for (j = 0; j < n; + j++, su = (void *)su + susz, si = (void *)si + sisz) { + si->sui_lastmod = le64_to_cpu(su->su_lastmod); + si->sui_nblocks = le32_to_cpu(su->su_nblocks); + si->sui_flags = le32_to_cpu(su->su_flags) & ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); if (nilfs_segment_is_active(nilfs, segnum + j)) - si[i + j].sui_flags |= + si->sui_flags |= (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(su_bh); } ret = nsegs; @@ -556,3 +868,355 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, up_read(&NILFS_MDT(sufile)->mi_sem); return ret; } + +/** + * nilfs_sufile_set_suinfo - sets segment usage info + * @sufile: inode of segment usage file + * @buf: array of suinfo_update + * @supsz: byte size of suinfo_update + * @nsup: size of suinfo_update array + * + * Description: Takes an array of nilfs_suinfo_update structs and updates + * segment usage accordingly. Only the fields indicated by the sup_flags + * are updated. + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid values in input (segment number, flags or nblocks) + */ +ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, + unsigned supsz, size_t nsup) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh, *bh; + struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; + struct nilfs_segment_usage *su; + void *kaddr; + unsigned long blkoff, prev_blkoff; + int cleansi, cleansu, dirtysi, dirtysu; + long ncleaned = 0, ndirtied = 0; + int ret = 0; + + if (unlikely(nsup == 0)) + return ret; + + for (sup = buf; sup < supend; sup = (void *)sup + supsz) { + if (sup->sup_segnum >= nilfs->ns_nsegments + || (sup->sup_flags & + (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) + || (nilfs_suinfo_update_nblocks(sup) && + sup->sup_sui.sui_nblocks > + nilfs->ns_blocks_per_segment)) + return -EINVAL; + } + + down_write(&NILFS_MDT(sufile)->mi_sem); + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + sup = buf; + blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); + ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); + if (ret < 0) + goto out_header; + + for (;;) { + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage( + sufile, sup->sup_segnum, bh, kaddr); + + if (nilfs_suinfo_update_lastmod(sup)) + su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); + + if (nilfs_suinfo_update_nblocks(sup)) + su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks); + + if (nilfs_suinfo_update_flags(sup)) { + /* + * Active flag is a virtual flag projected by running + * nilfs kernel code - drop it not to write it to + * disk. + */ + sup->sup_sui.sui_flags &= + ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); + + cleansi = nilfs_suinfo_clean(&sup->sup_sui); + cleansu = nilfs_segment_usage_clean(su); + dirtysi = nilfs_suinfo_dirty(&sup->sup_sui); + dirtysu = nilfs_segment_usage_dirty(su); + + if (cleansi && !cleansu) + ++ncleaned; + else if (!cleansi && cleansu) + --ncleaned; + + if (dirtysi && !dirtysu) + ++ndirtied; + else if (!dirtysi && dirtysu) + --ndirtied; + + su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); + } + + kunmap_atomic(kaddr); + + sup = (void *)sup + supsz; + if (sup >= supend) + break; + + prev_blkoff = blkoff; + blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); + if (blkoff == prev_blkoff) + continue; + + /* get different block */ + mark_buffer_dirty(bh); + put_bh(bh); + ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); + if (unlikely(ret < 0)) + goto out_mark; + } + mark_buffer_dirty(bh); + put_bh(bh); + + out_mark: + if (ncleaned || ndirtied) { + nilfs_sufile_mod_counter(header_bh, (u64)ncleaned, + (u64)ndirtied); + NILFS_SUI(sufile)->ncleansegs += ncleaned; + } + nilfs_mdt_mark_dirty(sufile); + out_header: + put_bh(header_bh); + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_trim_fs() - trim ioctl handle function + * @sufile: inode of segment usage file + * @range: fstrim_range structure + * + * start: First Byte to trim + * len: number of Bytes to trim from start + * minlen: minimum extent length in Bytes + * + * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes + * from start to start+len. start is rounded up to the next block boundary + * and start+len is rounded down. For each clean segment blkdev_issue_discard + * function is invoked. + * + * Return Value: On success, 0 is returned or negative error code, otherwise. + */ +int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *su_bh; + struct nilfs_segment_usage *su; + void *kaddr; + size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; + sector_t seg_start, seg_end, start_block, end_block; + sector_t start = 0, nblocks = 0; + u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0; + int ret = 0; + unsigned int sects_per_block; + + sects_per_block = (1 << nilfs->ns_blocksize_bits) / + bdev_logical_block_size(nilfs->ns_bdev); + len = range->len >> nilfs->ns_blocksize_bits; + minlen = range->minlen >> nilfs->ns_blocksize_bits; + max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment); + + if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits) + return -EINVAL; + + start_block = (range->start + nilfs->ns_blocksize - 1) >> + nilfs->ns_blocksize_bits; + + /* + * range->len can be very large (actually, it is set to + * ULLONG_MAX by default) - truncate upper end of the range + * carefully so as not to overflow. + */ + if (max_blocks - start_block < len) + end_block = max_blocks - 1; + else + end_block = start_block + len - 1; + + segnum = nilfs_get_segnum_of_block(nilfs, start_block); + segnum_end = nilfs_get_segnum_of_block(nilfs, end_block); + + down_read(&NILFS_MDT(sufile)->mi_sem); + + while (segnum <= segnum_end) { + n = nilfs_sufile_segment_usages_in_block(sufile, segnum, + segnum_end); + + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, + &su_bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out_sem; + /* hole */ + segnum += n; + continue; + } + + kaddr = kmap_atomic(su_bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, + su_bh, kaddr); + for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { + if (!nilfs_segment_usage_clean(su)) + continue; + + nilfs_get_segment_range(nilfs, segnum, &seg_start, + &seg_end); + + if (!nblocks) { + /* start new extent */ + start = seg_start; + nblocks = seg_end - seg_start + 1; + continue; + } + + if (start + nblocks == seg_start) { + /* add to previous extent */ + nblocks += seg_end - seg_start + 1; + continue; + } + + /* discard previous extent */ + if (start < start_block) { + nblocks -= start_block - start; + start = start_block; + } + + if (nblocks >= minlen) { + kunmap_atomic(kaddr); + + ret = blkdev_issue_discard(nilfs->ns_bdev, + start * sects_per_block, + nblocks * sects_per_block, + GFP_NOFS, 0); + if (ret < 0) { + put_bh(su_bh); + goto out_sem; + } + + ndiscarded += nblocks; + kaddr = kmap_atomic(su_bh->b_page); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum, su_bh, kaddr); + } + + /* start new extent */ + start = seg_start; + nblocks = seg_end - seg_start + 1; + } + kunmap_atomic(kaddr); + put_bh(su_bh); + } + + + if (nblocks) { + /* discard last extent */ + if (start < start_block) { + nblocks -= start_block - start; + start = start_block; + } + if (start + nblocks > end_block + 1) + nblocks = end_block - start + 1; + + if (nblocks >= minlen) { + ret = blkdev_issue_discard(nilfs->ns_bdev, + start * sects_per_block, + nblocks * sects_per_block, + GFP_NOFS, 0); + if (!ret) + ndiscarded += nblocks; + } + } + +out_sem: + up_read(&NILFS_MDT(sufile)->mi_sem); + + range->len = ndiscarded << nilfs->ns_blocksize_bits; + return ret; +} + +/** + * nilfs_sufile_read - read or get sufile inode + * @sb: super block instance + * @susize: size of a segment usage entry + * @raw_inode: on-disk sufile inode + * @inodep: buffer to store the inode + */ +int nilfs_sufile_read(struct super_block *sb, size_t susize, + struct nilfs_inode *raw_inode, struct inode **inodep) +{ + struct inode *sufile; + struct nilfs_sufile_info *sui; + struct buffer_head *header_bh; + struct nilfs_sufile_header *header; + void *kaddr; + int err; + + if (susize > sb->s_blocksize) { + printk(KERN_ERR + "NILFS: too large segment usage size: %zu bytes.\n", + susize); + return -EINVAL; + } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { + printk(KERN_ERR + "NILFS: too small segment usage size: %zu bytes.\n", + susize); + return -EINVAL; + } + + sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); + if (unlikely(!sufile)) + return -ENOMEM; + if (!(sufile->i_state & I_NEW)) + goto out; + + err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui)); + if (err) + goto failed; + + nilfs_mdt_set_entry_size(sufile, susize, + sizeof(struct nilfs_sufile_header)); + + err = nilfs_read_inode_common(sufile, raw_inode); + if (err) + goto failed; + + err = nilfs_sufile_get_header_block(sufile, &header_bh); + if (err) + goto failed; + + sui = NILFS_SUI(sufile); + kaddr = kmap_atomic(header_bh->b_page); + header = kaddr + bh_offset(header_bh); + sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); + kunmap_atomic(kaddr); + brelse(header_bh); + + sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; + sui->allocmin = 0; + + unlock_new_inode(sufile); + out: + *inodep = sufile; + return 0; + failed: + iget_failed(sufile); + return err; +} diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index a2e2efd4ade..b8afd72f237 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -28,56 +28,45 @@ #include <linux/nilfs2_fs.h> #include "mdt.h" -#define NILFS_SUFILE_GFP NILFS_MDT_GFP static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) { - return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; + return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments; } +unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); + +int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end); int nilfs_sufile_alloc(struct inode *, __u64 *); -int nilfs_sufile_get_segment_usage(struct inode *, __u64, - struct nilfs_segment_usage **, - struct buffer_head **); -void nilfs_sufile_put_segment_usage(struct inode *, __u64, - struct buffer_head *); +int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); +int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, + unsigned long nblocks, time_t modtime); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); -int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); -ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, +ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, size_t); +ssize_t nilfs_sufile_set_suinfo(struct inode *, void *, unsigned , size_t); +int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)); int nilfs_sufile_update(struct inode *, __u64, int, void (*dofunc)(struct inode *, __u64, struct buffer_head *, struct buffer_head *)); -void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, - struct buffer_head *); void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *, struct buffer_head *); void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); -/** - * nilfs_sufile_cancel_free - - * @sufile: inode of segment usage file - * @segnum: segment number - * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) -{ - return nilfs_sufile_update(sufile, segnum, 0, - nilfs_sufile_do_cancel_free); -} +int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs); +int nilfs_sufile_read(struct super_block *sb, size_t susize, + struct nilfs_inode *raw_inode, struct inode **inodep); +int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range); /** * nilfs_sufile_scrap - make a segment garbage @@ -100,6 +89,38 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) } /** + * nilfs_sufile_freev - free segments + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @ndone: place to store the number of freed segments + */ +static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, + size_t nsegs, size_t *ndone) +{ + return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone, + nilfs_sufile_do_free); +} + +/** + * nilfs_sufile_cancel_freev - reallocate freeing segments + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @ndone: place to store the number of cancelled segments + * + * Return Value: On success, 0 is returned. On error, a negative error codes + * is returned. + */ +static inline int nilfs_sufile_cancel_freev(struct inode *sufile, + __u64 *segnumv, size_t nsegs, + size_t *ndone) +{ + return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone, + nilfs_sufile_do_cancel_free); +} + +/** * nilfs_sufile_set_error - mark a segment as erroneous * @sufile: inode of segment usage file * @segnum: segment number diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6989b03e97a..8c532b2ca3a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -43,18 +43,20 @@ #include <linux/init.h> #include <linux/blkdev.h> #include <linux/parser.h> -#include <linux/random.h> #include <linux/crc32.h> -#include <linux/smp_lock.h> #include <linux/vfs.h> #include <linux/writeback.h> -#include <linux/kobject.h> -#include <linux/exportfs.h> +#include <linux/seq_file.h> +#include <linux/mount.h> #include "nilfs.h" +#include "export.h" #include "mdt.h" #include "alloc.h" +#include "btree.h" +#include "btnode.h" #include "page.h" #include "cpfile.h" +#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */ #include "ifile.h" #include "dat.h" #include "segment.h" @@ -65,9 +67,32 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); +static struct kmem_cache *nilfs_inode_cachep; +struct kmem_cache *nilfs_transaction_cachep; +struct kmem_cache *nilfs_segbuf_cachep; +struct kmem_cache *nilfs_btree_path_cache; + +static int nilfs_setup_super(struct super_block *sb, int is_mount); static int nilfs_remount(struct super_block *sb, int *flags, char *data); -static int test_exclusive_mount(struct file_system_type *fs_type, - struct block_device *bdev, int flags); + +static void nilfs_set_error(struct super_block *sb) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + + down_write(&nilfs->ns_sem); + if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { + nilfs->ns_mount_state |= NILFS_ERROR_FS; + sbp = nilfs_prepare_super(sb, 0); + if (likely(sbp)) { + sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); + if (sbp[1]) + sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); + nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + } + } + up_write(&nilfs->ns_sem); +} /** * nilfs_error() - report failure condition on a filesystem @@ -84,37 +109,30 @@ static int test_exclusive_mount(struct file_system_type *fs_type, void nilfs_error(struct super_block *sb, const char *function, const char *fmt, ...) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sb->s_fs_info; + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); - vprintk(fmt, args); - printk("\n"); - va_end(args); - if (!(sb->s_flags & MS_RDONLY)) { - struct the_nilfs *nilfs = sbi->s_nilfs; + vaf.fmt = fmt; + vaf.va = &args; - if (!nilfs_test_opt(sbi, ERRORS_CONT)) - nilfs_detach_segment_constructor(sbi); + printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n", + sb->s_id, function, &vaf); - down_write(&nilfs->ns_sem); - if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { - nilfs->ns_mount_state |= NILFS_ERROR_FS; - nilfs->ns_sbp[0]->s_state |= - cpu_to_le16(NILFS_ERROR_FS); - nilfs_commit_super(sbi, 1); - } - up_write(&nilfs->ns_sem); + va_end(args); + + if (!(sb->s_flags & MS_RDONLY)) { + nilfs_set_error(sb); - if (nilfs_test_opt(sbi, ERRORS_RO)) { + if (nilfs_test_opt(nilfs, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; } } - if (nilfs_test_opt(sbi, ERRORS_PANIC)) + if (nilfs_test_opt(nilfs, ERRORS_PANIC)) panic("NILFS (device %s): panic forced after error\n", sb->s_id); } @@ -122,17 +140,20 @@ void nilfs_error(struct super_block *sb, const char *function, void nilfs_warning(struct super_block *sb, const char *function, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_WARNING "NILFS warning (device %s): %s: ", - sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); } -static struct kmem_cache *nilfs_inode_cachep; struct inode *nilfs_alloc_inode(struct super_block *sb) { @@ -143,103 +164,61 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_cno = 0; ii->vfs_inode.i_version = 1; - nilfs_btnode_cache_init(&ii->i_btnode_cache); + nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi); return &ii->vfs_inode; } -void nilfs_destroy_inode(struct inode *inode) -{ - kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); -} - -static void init_once(void *obj) -{ - struct nilfs_inode_info *ii = obj; - - INIT_LIST_HEAD(&ii->i_dirty); -#ifdef CONFIG_NILFS_XATTR - init_rwsem(&ii->xattr_sem); -#endif - nilfs_btnode_cache_init_once(&ii->i_btnode_cache); - ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union; - inode_init_once(&ii->vfs_inode); -} - -static int nilfs_init_inode_cache(void) +static void nilfs_i_callback(struct rcu_head *head) { - nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", - sizeof(struct nilfs_inode_info), - 0, SLAB_RECLAIM_ACCOUNT, - init_once); + struct inode *inode = container_of(head, struct inode, i_rcu); + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); - return (nilfs_inode_cachep == NULL) ? -ENOMEM : 0; -} - -static inline void nilfs_destroy_inode_cache(void) -{ - kmem_cache_destroy(nilfs_inode_cachep); + if (mdi) { + kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ + kfree(mdi); + } + kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } -static void nilfs_clear_inode(struct inode *inode) +void nilfs_destroy_inode(struct inode *inode) { - struct nilfs_inode_info *ii = NILFS_I(inode); - -#ifdef CONFIG_NILFS_POSIX_ACL - if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) { - posix_acl_release(ii->i_acl); - ii->i_acl = NILFS_ACL_NOT_CACHED; - } - if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) { - posix_acl_release(ii->i_default_acl); - ii->i_default_acl = NILFS_ACL_NOT_CACHED; - } -#endif - /* - * Free resources allocated in nilfs_read_inode(), here. - */ - BUG_ON(!list_empty(&ii->i_dirty)); - brelse(ii->i_bh); - ii->i_bh = NULL; - - if (test_bit(NILFS_I_BMAP, &ii->i_state)) - nilfs_bmap_clear(ii->i_bmap); - - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + call_rcu(&inode->i_rcu, nilfs_i_callback); } -static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) +static int nilfs_sync_super(struct super_block *sb, int flag) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + if (nilfs_test_opt(nilfs, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_FLUSH_FUA); + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); if (err == -EIO && nilfs->ns_sbh[1]) { + /* + * sbp[0] points to newer log than sbp[1], + * so copy sbp[0] to sbp[1] to take over sbp[0]. + */ + memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0], + nilfs->ns_sbsize); nilfs_fall_back_super_block(nilfs); goto retry; } } else { struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; + nilfs->ns_sbwcount++; + /* * The latest segment becomes trailable from the position * written in superblock. @@ -248,169 +227,317 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) /* update GC protection for recent segments */ if (nilfs->ns_sbh[1]) { - sbp = NULL; - if (dupsb) { + if (flag == NILFS_SB_COMMIT_ALL) { set_buffer_dirty(nilfs->ns_sbh[1]); - if (!sync_dirty_buffer(nilfs->ns_sbh[1])) - sbp = nilfs->ns_sbp[1]; + if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0) + goto out; } + if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) < + le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno)) + sbp = nilfs->ns_sbp[1]; } - if (sbp) { - spin_lock(&nilfs->ns_last_segment_lock); - nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); - spin_unlock(&nilfs->ns_last_segment_lock); - } - } + spin_lock(&nilfs->ns_last_segment_lock); + nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); + spin_unlock(&nilfs->ns_last_segment_lock); + } + out: return err; } -int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) +void nilfs_set_log_cursor(struct nilfs_super_block *sbp, + struct the_nilfs *nilfs) { - struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block **sbp = nilfs->ns_sbp; sector_t nfreeblocks; - time_t t; - int err; - /* nilfs->sem must be locked by the caller. */ - if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) { - if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC) - nilfs_swap_super_block(nilfs); - else { + /* nilfs->ns_sem must be locked by the caller. */ + nilfs_count_free_blocks(nilfs, &nfreeblocks); + sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); + + spin_lock(&nilfs->ns_last_segment_lock); + sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); + sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); + sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); + spin_unlock(&nilfs->ns_last_segment_lock); +} + +struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, + int flip) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp = nilfs->ns_sbp; + + /* nilfs->ns_sem must be locked by the caller. */ + if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { + if (sbp[1] && + sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { + memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); + } else { printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", - sbi->s_super->s_id); - return -EIO; + sb->s_id); + return NULL; } + } else if (sbp[1] && + sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); } - err = nilfs_count_free_blocks(nilfs, &nfreeblocks); - if (unlikely(err)) { - printk(KERN_ERR "NILFS: failed to count free blocks\n"); - return err; - } - spin_lock(&nilfs->ns_last_segment_lock); - sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); - sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); - sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); - spin_unlock(&nilfs->ns_last_segment_lock); + if (flip && sbp[1]) + nilfs_swap_super_block(nilfs); + + return sbp; +} + +int nilfs_commit_super(struct super_block *sb, int flag) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp = nilfs->ns_sbp; + time_t t; + + /* nilfs->ns_sem must be locked by the caller. */ t = get_seconds(); - nilfs->ns_sbwtime[0] = t; - sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks); + nilfs->ns_sbwtime = t; sbp[0]->s_wtime = cpu_to_le64(t); sbp[0]->s_sum = 0; sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp[0], nilfs->ns_sbsize)); - if (dupsb && sbp[1]) { - memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); - nilfs->ns_sbwtime[1] = t; + if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) { + sbp[1]->s_wtime = sbp[0]->s_wtime; + sbp[1]->s_sum = 0; + sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, + (unsigned char *)sbp[1], + nilfs->ns_sbsize)); } - sbi->s_super->s_dirt = 0; - return nilfs_sync_super(sbi, dupsb); + clear_nilfs_sb_dirty(nilfs); + return nilfs_sync_super(sb, flag); } -static void nilfs_put_super(struct super_block *sb) +/** + * nilfs_cleanup_super() - write filesystem state for cleanup + * @sb: super block instance to be unmounted or degraded to read-only + * + * This function restores state flags in the on-disk super block. + * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the + * filesystem was not clean previously. + */ +int nilfs_cleanup_super(struct super_block *sb) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + int flag = NILFS_SB_COMMIT; + int ret = -EIO; + + sbp = nilfs_prepare_super(sb, 0); + if (sbp) { + sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); + nilfs_set_log_cursor(sbp[0], nilfs); + if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { + /* + * make the "clean" flag also to the opposite + * super block if both super blocks point to + * the same checkpoint. + */ + sbp[1]->s_state = sbp[0]->s_state; + flag = NILFS_SB_COMMIT_ALL; + } + ret = nilfs_commit_super(sb, flag); + } + return ret; +} - nilfs_detach_segment_constructor(sbi); +/** + * nilfs_move_2nd_super - relocate secondary super block + * @sb: super block instance + * @sb2off: new offset of the secondary super block (in bytes) + */ +static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct buffer_head *nsbh; + struct nilfs_super_block *nsbp; + sector_t blocknr, newblocknr; + unsigned long offset; + int sb2i = -1; /* array index of the secondary superblock */ + int ret = 0; - if (!(sb->s_flags & MS_RDONLY)) { - down_write(&nilfs->ns_sem); - nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); - nilfs_commit_super(sbi, 1); - up_write(&nilfs->ns_sem); + /* nilfs->ns_sem must be locked by the caller. */ + if (nilfs->ns_sbh[1] && + nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) { + sb2i = 1; + blocknr = nilfs->ns_sbh[1]->b_blocknr; + } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { + sb2i = 0; + blocknr = nilfs->ns_sbh[0]->b_blocknr; } - - nilfs_detach_checkpoint(sbi); - put_nilfs(sbi->s_nilfs); - sbi->s_super = NULL; - sb->s_fs_info = NULL; - kfree(sbi); + if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) + goto out; /* super block location is unchanged */ + + /* Get new super block buffer */ + newblocknr = sb2off >> nilfs->ns_blocksize_bits; + offset = sb2off & (nilfs->ns_blocksize - 1); + nsbh = sb_getblk(sb, newblocknr); + if (!nsbh) { + printk(KERN_WARNING + "NILFS warning: unable to move secondary superblock " + "to block %llu\n", (unsigned long long)newblocknr); + ret = -EIO; + goto out; + } + nsbp = (void *)nsbh->b_data + offset; + memset(nsbp, 0, nilfs->ns_blocksize); + + if (sb2i >= 0) { + memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); + brelse(nilfs->ns_sbh[sb2i]); + nilfs->ns_sbh[sb2i] = nsbh; + nilfs->ns_sbp[sb2i] = nsbp; + } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) { + /* secondary super block will be restored to index 1 */ + nilfs->ns_sbh[1] = nsbh; + nilfs->ns_sbp[1] = nsbp; + } else { + brelse(nsbh); + } +out: + return ret; } /** - * nilfs_write_super - write super block(s) of NILFS - * @sb: super_block - * - * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and - * clears s_dirt. This function is called in the section protected by - * lock_super(). - * - * The s_dirt flag is managed by each filesystem and we protect it by ns_sem - * of the struct the_nilfs. Lock order must be as follows: - * - * 1. lock_super() - * 2. down_write(&nilfs->ns_sem) - * - * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer - * of the super block (nilfs->ns_sbp[]). - * - * In most cases, VFS functions call lock_super() before calling these - * methods. So we must be careful not to bring on deadlocks when using - * lock_super(); see generic_shutdown_super(), write_super(), and so on. - * - * Note that order of lock_kernel() and lock_super() depends on contexts - * of VFS. We should also note that lock_kernel() can be used in its - * protective section and only the outermost one has an effect. + * nilfs_resize_fs - resize the filesystem + * @sb: super block instance + * @newsize: new size of the filesystem (in bytes) */ -static void nilfs_write_super(struct super_block *sb) +int nilfs_resize_fs(struct super_block *sb, __u64 newsize) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + __u64 devsize, newnsegs; + loff_t sb2off; + int ret; + + ret = -ERANGE; + devsize = i_size_read(sb->s_bdev->bd_inode); + if (newsize > devsize) + goto out; + + /* + * Write lock is required to protect some functions depending + * on the number of segments, the number of reserved segments, + * and so forth. + */ + down_write(&nilfs->ns_segctor_sem); + + sb2off = NILFS_SB2_OFFSET_BYTES(newsize); + newnsegs = sb2off >> nilfs->ns_blocksize_bits; + do_div(newnsegs, nilfs->ns_blocks_per_segment); + + ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); + up_write(&nilfs->ns_segctor_sem); + if (ret < 0) + goto out; + + ret = nilfs_construct_segment(sb); + if (ret < 0) + goto out; down_write(&nilfs->ns_sem); - if (!(sb->s_flags & MS_RDONLY)) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u64 t = get_seconds(); - int dupsb; - - if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] && - t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) { - up_write(&nilfs->ns_sem); - return; - } - dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; - nilfs_commit_super(sbi, dupsb); + nilfs_move_2nd_super(sb, sb2off); + ret = -EIO; + sbp = nilfs_prepare_super(sb, 0); + if (likely(sbp)) { + nilfs_set_log_cursor(sbp[0], nilfs); + /* + * Drop NILFS_RESIZE_FS flag for compatibility with + * mount-time resize which may be implemented in a + * future release. + */ + sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & + ~NILFS_RESIZE_FS); + sbp[0]->s_dev_size = cpu_to_le64(newsize); + sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments); + if (sbp[1]) + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } - sb->s_dirt = 0; up_write(&nilfs->ns_sem); + + /* + * Reset the range of allocatable segments last. This order + * is important in the case of expansion because the secondary + * superblock must be protected from log write until migration + * completes. + */ + if (!ret) + nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1); +out: + return ret; +} + +static void nilfs_put_super(struct super_block *sb) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + + nilfs_detach_log_writer(sb); + + if (!(sb->s_flags & MS_RDONLY)) { + down_write(&nilfs->ns_sem); + nilfs_cleanup_super(sb); + up_write(&nilfs->ns_sem); + } + + iput(nilfs->ns_sufile); + iput(nilfs->ns_cpfile); + iput(nilfs->ns_dat); + + destroy_nilfs(nilfs); + sb->s_fs_info = NULL; } static int nilfs_sync_fs(struct super_block *sb, int wait) { + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; int err = 0; /* This function is called when super block should be written back */ if (wait) err = nilfs_construct_segment(sb); + + down_write(&nilfs->ns_sem); + if (nilfs_sb_dirty(nilfs)) { + sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs)); + if (likely(sbp)) { + nilfs_set_log_cursor(sbp[0], nilfs); + nilfs_commit_super(sb, NILFS_SB_COMMIT); + } + } + up_write(&nilfs->ns_sem); + return err; } -int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) +int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, + struct nilfs_root **rootp) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_root *root; struct nilfs_checkpoint *raw_cp; struct buffer_head *bh_cp; - int err; + int err = -ENOMEM; - down_write(&nilfs->ns_sem); - list_add(&sbi->s_list, &nilfs->ns_supers); - up_write(&nilfs->ns_sem); - - sbi->s_ifile = nilfs_mdt_new( - nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); - if (!sbi->s_ifile) - return -ENOMEM; + root = nilfs_find_or_create_root( + nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno); + if (!root) + return err; - err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size); - if (unlikely(err)) - goto failed; + if (root->ifile) + goto reuse; /* already attached checkpoint */ + down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, &bh_cp); + up_read(&nilfs->ns_segctor_sem); if (unlikely(err)) { if (err == -ENOENT || err == -EINVAL) { printk(KERN_ERR @@ -421,66 +548,70 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) } goto failed; } - err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode); - if (unlikely(err)) + + err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size, + &raw_cp->cp_ifile_inode, &root->ifile); + if (err) goto failed_bh; - atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); - atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); + + atomic64_set(&root->inodes_count, + le64_to_cpu(raw_cp->cp_inodes_count)); + atomic64_set(&root->blocks_count, + le64_to_cpu(raw_cp->cp_blocks_count)); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); + + reuse: + *rootp = root; return 0; failed_bh: nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); failed: - nilfs_mdt_destroy(sbi->s_ifile); - sbi->s_ifile = NULL; - - down_write(&nilfs->ns_sem); - list_del_init(&sbi->s_list); - up_write(&nilfs->ns_sem); + nilfs_put_root(root); return err; } -void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) +static int nilfs_freeze(struct super_block *sb) { - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; + int err; + + if (sb->s_flags & MS_RDONLY) + return 0; - nilfs_mdt_clear(sbi->s_ifile); - nilfs_mdt_destroy(sbi->s_ifile); - sbi->s_ifile = NULL; + /* Mark super block clean */ down_write(&nilfs->ns_sem); - list_del_init(&sbi->s_list); + err = nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); + return err; } -static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) +static int nilfs_unfreeze(struct super_block *sb) { - struct the_nilfs *nilfs = sbi->s_nilfs; - int err = 0; + struct the_nilfs *nilfs = sb->s_fs_info; + + if (sb->s_flags & MS_RDONLY) + return 0; down_write(&nilfs->ns_sem); - if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { - nilfs->ns_mount_state |= NILFS_VALID_FS; - err = nilfs_commit_super(sbi, 1); - if (likely(!err)) - printk(KERN_INFO "NILFS: recovery complete.\n"); - } + nilfs_setup_super(sb, false); up_write(&nilfs->ns_sem); - return err; + return 0; } static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root; + struct the_nilfs *nilfs = root->nilfs; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; + u64 nmaxinodes, nfreeinodes; int err; /* @@ -496,7 +627,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) /* * Compute the overhead * - * When distributing meta data blocks outside semgent structure, + * When distributing meta data blocks outside segment structure, * We must count them as the overhead. */ overhead = 0; @@ -505,14 +636,34 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) if (unlikely(err)) return err; + err = nilfs_ifile_count_free_inodes(root->ifile, + &nmaxinodes, &nfreeinodes); + if (unlikely(err)) { + printk(KERN_WARNING + "NILFS warning: fail to count free inodes: err %d.\n", + err); + if (err == -ERANGE) { + /* + * If nilfs_palloc_count_max_entries() returns + * -ERANGE error code then we simply treat + * curent inodes count as maximum possible and + * zero as free inodes value. + */ + nmaxinodes = atomic64_read(&root->inodes_count); + nfreeinodes = 0; + err = 0; + } else + return err; + } + buf->f_type = NILFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = blocks - overhead; buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; - buf->f_files = atomic_read(&sbi->s_inodes_count); - buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ + buf->f_files = nmaxinodes; + buf->f_ffree = nfreeinodes; buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); @@ -520,103 +671,69 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static struct super_operations nilfs_sops = { +static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry) +{ + struct super_block *sb = dentry->d_sb; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root; + + if (!nilfs_test_opt(nilfs, BARRIER)) + seq_puts(seq, ",nobarrier"); + if (root->cno != NILFS_CPTREE_CURRENT_CNO) + seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); + if (nilfs_test_opt(nilfs, ERRORS_PANIC)) + seq_puts(seq, ",errors=panic"); + if (nilfs_test_opt(nilfs, ERRORS_CONT)) + seq_puts(seq, ",errors=continue"); + if (nilfs_test_opt(nilfs, STRICT_ORDER)) + seq_puts(seq, ",order=strict"); + if (nilfs_test_opt(nilfs, NORECOVERY)) + seq_puts(seq, ",norecovery"); + if (nilfs_test_opt(nilfs, DISCARD)) + seq_puts(seq, ",discard"); + + return 0; +} + +static const struct super_operations nilfs_sops = { .alloc_inode = nilfs_alloc_inode, .destroy_inode = nilfs_destroy_inode, .dirty_inode = nilfs_dirty_inode, - /* .write_inode = nilfs_write_inode, */ - /* .put_inode = nilfs_put_inode, */ - /* .drop_inode = nilfs_drop_inode, */ - .delete_inode = nilfs_delete_inode, + .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, - .write_super = nilfs_write_super, .sync_fs = nilfs_sync_fs, - /* .write_super_lockfs */ - /* .unlockfs */ + .freeze_fs = nilfs_freeze, + .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, .remount_fs = nilfs_remount, - .clear_inode = nilfs_clear_inode, - /* .umount_begin */ - /* .show_options */ -}; - -static struct inode * -nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) -{ - struct inode *inode; - - if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO && - ino != NILFS_SKETCH_INO) - return ERR_PTR(-ESTALE); - - inode = nilfs_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { - iput(inode); - return ERR_PTR(-ESTALE); - } - - return inode; -} - -static struct dentry * -nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, - int fh_type) -{ - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - nilfs_nfs_get_inode); -} - -static struct dentry * -nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, - int fh_type) -{ - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - nilfs_nfs_get_inode); -} - -static struct export_operations nilfs_export_ops = { - .fh_to_dentry = nilfs_fh_to_dentry, - .fh_to_parent = nilfs_fh_to_parent, - .get_parent = nilfs_get_parent, + .show_options = nilfs_show_options }; enum { Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_barrier, Opt_snapshot, Opt_order, - Opt_err, + Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, + Opt_discard, Opt_nodiscard, Opt_err, }; static match_table_t tokens = { {Opt_err_cont, "errors=continue"}, {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, - {Opt_barrier, "barrier=%s"}, + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, {Opt_snapshot, "cp=%u"}, {Opt_order, "order=%s"}, + {Opt_norecovery, "norecovery"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, {Opt_err, NULL} }; -static int match_bool(substring_t *s, int *result) -{ - int len = s->to - s->from; - - if (strncmp(s->from, "on", len) == 0) - *result = 1; - else if (strncmp(s->from, "off", len) == 0) - *result = 0; - else - return 1; - return 0; -} - -static int parse_options(char *options, struct super_block *sb) +static int parse_options(char *options, struct super_block *sb, int is_remount) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sb->s_fs_info; char *p; substring_t args[MAX_OPT_ARGS]; - int option; if (!options) return 1; @@ -629,39 +746,46 @@ static int parse_options(char *options, struct super_block *sb) token = match_token(p, tokens, args); switch (token) { case Opt_barrier: - if (match_bool(&args[0], &option)) - return 0; - if (option) - nilfs_set_opt(sbi, BARRIER); - else - nilfs_clear_opt(sbi, BARRIER); + nilfs_set_opt(nilfs, BARRIER); + break; + case Opt_nobarrier: + nilfs_clear_opt(nilfs, BARRIER); break; case Opt_order: if (strcmp(args[0].from, "relaxed") == 0) /* Ordered data semantics */ - nilfs_clear_opt(sbi, STRICT_ORDER); + nilfs_clear_opt(nilfs, STRICT_ORDER); else if (strcmp(args[0].from, "strict") == 0) /* Strict in-order semantics */ - nilfs_set_opt(sbi, STRICT_ORDER); + nilfs_set_opt(nilfs, STRICT_ORDER); else return 0; break; case Opt_err_panic: - nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); + nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC); break; case Opt_err_ro: - nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); + nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO); break; case Opt_err_cont: - nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); + nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT); break; case Opt_snapshot: - if (match_int(&args[0], &option) || option <= 0) - return 0; - if (!(sb->s_flags & MS_RDONLY)) + if (is_remount) { + printk(KERN_ERR + "NILFS: \"%s\" option is invalid " + "for remount.\n", p); return 0; - sbi->s_snapshot_cno = option; - nilfs_set_opt(sbi, SNAPSHOT); + } + break; + case Opt_norecovery: + nilfs_set_opt(nilfs, NORECOVERY); + break; + case Opt_discard: + nilfs_set_opt(nilfs, DISCARD); + break; + case Opt_nodiscard: + nilfs_clear_opt(nilfs, DISCARD); break; default: printk(KERN_ERR @@ -673,24 +797,34 @@ static int parse_options(char *options, struct super_block *sb) } static inline void -nilfs_set_default_options(struct nilfs_sb_info *sbi, +nilfs_set_default_options(struct super_block *sb, struct nilfs_super_block *sbp) { - sbi->s_mount_opt = - NILFS_MOUNT_ERRORS_CONT | NILFS_MOUNT_BARRIER; + struct the_nilfs *nilfs = sb->s_fs_info; + + nilfs->ns_mount_opt = + NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; } -static int nilfs_setup_super(struct nilfs_sb_info *sbi) +static int nilfs_setup_super(struct super_block *sb, int is_mount) { - struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; - int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); - int mnt_count = le16_to_cpu(sbp->s_mnt_count); - - /* nilfs->sem must be locked by the caller. */ - if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { - printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); - } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) { + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + int max_mnt_count; + int mnt_count; + + /* nilfs->ns_sem must be locked by the caller. */ + sbp = nilfs_prepare_super(sb, 0); + if (!sbp) + return -EIO; + + if (!is_mount) + goto skip_mount_setup; + + max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); + mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); + + if (nilfs->ns_mount_state & NILFS_ERROR_FS) { printk(KERN_WARNING "NILFS warning: mounting fs with errors\n"); #if 0 @@ -700,12 +834,18 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) #endif } if (!max_mnt_count) - sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); + sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); + + sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); + sbp[0]->s_mtime = cpu_to_le64(get_seconds()); - sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); - sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); - sbp->s_mtime = cpu_to_le64(get_seconds()); - return nilfs_commit_super(sbi, 1); +skip_mount_setup: + sbp[0]->s_state = + cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); + /* synchronize sbp[1] with sbp[0] */ + if (sbp[1]) + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, @@ -726,7 +866,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, struct nilfs_super_block *sbp, char *data) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sb->s_fs_info; sb->s_magic = le16_to_cpu(sbp->s_magic); @@ -735,14 +875,166 @@ int nilfs_store_magic_and_option(struct super_block *sb, sb->s_flags |= MS_NOATIME; #endif - nilfs_set_default_options(sbi, sbp); + nilfs_set_default_options(sb, sbp); + + nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); + nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); + nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); + nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); + + return !parse_options(data, sb, 0) ? -EINVAL : 0 ; +} + +int nilfs_check_feature_compatibility(struct super_block *sb, + struct nilfs_super_block *sbp) +{ + __u64 features; + + features = le64_to_cpu(sbp->s_feature_incompat) & + ~NILFS_FEATURE_INCOMPAT_SUPP; + if (features) { + printk(KERN_ERR "NILFS: couldn't mount because of unsupported " + "optional features (%llx)\n", + (unsigned long long)features); + return -EINVAL; + } + features = le64_to_cpu(sbp->s_feature_compat_ro) & + ~NILFS_FEATURE_COMPAT_RO_SUPP; + if (!(sb->s_flags & MS_RDONLY) && features) { + printk(KERN_ERR "NILFS: couldn't mount RDWR because of " + "unsupported optional features (%llx)\n", + (unsigned long long)features); + return -EINVAL; + } + return 0; +} + +static int nilfs_get_root_dentry(struct super_block *sb, + struct nilfs_root *root, + struct dentry **root_dentry) +{ + struct inode *inode; + struct dentry *dentry; + int ret = 0; + + inode = nilfs_iget(sb, root, NILFS_ROOT_INO); + if (IS_ERR(inode)) { + printk(KERN_ERR "NILFS: get root inode failed\n"); + ret = PTR_ERR(inode); + goto out; + } + if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { + iput(inode); + printk(KERN_ERR "NILFS: corrupt root inode.\n"); + ret = -EINVAL; + goto out; + } + + if (root->cno == NILFS_CPTREE_CURRENT_CNO) { + dentry = d_find_alias(inode); + if (!dentry) { + dentry = d_make_root(inode); + if (!dentry) { + ret = -ENOMEM; + goto failed_dentry; + } + } else { + iput(inode); + } + } else { + dentry = d_obtain_alias(inode); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto failed_dentry; + } + } + *root_dentry = dentry; + out: + return ret; + + failed_dentry: + printk(KERN_ERR "NILFS: get root dentry failed\n"); + goto out; +} + +static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, + struct dentry **root_dentry) +{ + struct the_nilfs *nilfs = s->s_fs_info; + struct nilfs_root *root; + int ret; + + mutex_lock(&nilfs->ns_snapshot_mount_mutex); + + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) { + ret = (ret == -ENOENT) ? -EINVAL : ret; + goto out; + } else if (!ret) { + printk(KERN_ERR "NILFS: The specified checkpoint is " + "not a snapshot (checkpoint number=%llu).\n", + (unsigned long long)cno); + ret = -EINVAL; + goto out; + } + + ret = nilfs_attach_checkpoint(s, cno, false, &root); + if (ret) { + printk(KERN_ERR "NILFS: error loading snapshot " + "(checkpoint number=%llu).\n", + (unsigned long long)cno); + goto out; + } + ret = nilfs_get_root_dentry(s, root, root_dentry); + nilfs_put_root(root); + out: + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); + return ret; +} + +/** + * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint + * @root_dentry: root dentry of the tree to be shrunk + * + * This function returns true if the tree was in-use. + */ +static bool nilfs_tree_is_busy(struct dentry *root_dentry) +{ + shrink_dcache_parent(root_dentry); + return d_count(root_dentry) > 1; +} - sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); - sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); - sbi->s_interval = le32_to_cpu(sbp->s_c_interval); - sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); +int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_root *root; + struct inode *inode; + struct dentry *dentry; + int ret; - return !parse_options(data, sb) ? -EINVAL : 0 ; + if (cno < 0 || cno > nilfs->ns_cno) + return false; + + if (cno >= nilfs_last_cno(nilfs)) + return true; /* protect recent checkpoints */ + + ret = false; + root = nilfs_lookup_root(nilfs, cno); + if (root) { + inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); + if (inode) { + dentry = d_find_alias(inode); + if (dentry) { + ret = nilfs_tree_is_busy(dentry); + dput(dentry); + } + iput(inode); + } + nilfs_put_root(root); + } + return ret; } /** @@ -750,241 +1042,165 @@ int nilfs_store_magic_and_option(struct super_block *sb, * @sb: super_block * @data: mount options * @silent: silent mode flag - * @nilfs: the_nilfs struct * - * This function is called exclusively by bd_mount_mutex. + * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. */ static int -nilfs_fill_super(struct super_block *sb, void *data, int silent, - struct the_nilfs *nilfs) +nilfs_fill_super(struct super_block *sb, void *data, int silent) { - struct nilfs_sb_info *sbi; - struct inode *root; + struct the_nilfs *nilfs; + struct nilfs_root *fsroot; + struct backing_dev_info *bdi; __u64 cno; int err; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) + nilfs = alloc_nilfs(sb->s_bdev); + if (!nilfs) return -ENOMEM; - sb->s_fs_info = sbi; - - get_nilfs(nilfs); - sbi->s_nilfs = nilfs; - sbi->s_super = sb; + sb->s_fs_info = nilfs; - err = init_nilfs(nilfs, sbi, (char *)data); + err = init_nilfs(nilfs, sb, (char *)data); if (err) - goto failed_sbi; - - spin_lock_init(&sbi->s_inode_lock); - INIT_LIST_HEAD(&sbi->s_dirty_files); - INIT_LIST_HEAD(&sbi->s_list); - - /* - * Following initialization is overlapped because - * nilfs_sb_info structure has been cleared at the beginning. - * But we reserve them to keep our interest and make ready - * for the future change. - */ - get_random_bytes(&sbi->s_next_generation, - sizeof(sbi->s_next_generation)); - spin_lock_init(&sbi->s_next_gen_lock); + goto failed_nilfs; sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; + sb->s_max_links = NILFS_LINK_MAX; - if (!nilfs_loaded(nilfs)) { - err = load_nilfs(nilfs, sbi); - if (err) - goto failed_sbi; - } - cno = nilfs_last_cno(nilfs); + bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + sb->s_bdi = bdi ? : &default_backing_dev_info; - if (sb->s_flags & MS_RDONLY) { - if (nilfs_test_opt(sbi, SNAPSHOT)) { - err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, - sbi->s_snapshot_cno); - if (err < 0) - goto failed_sbi; - if (!err) { - printk(KERN_ERR - "NILFS: The specified checkpoint is " - "not a snapshot " - "(checkpoint number=%llu).\n", - (unsigned long long)sbi->s_snapshot_cno); - err = -EINVAL; - goto failed_sbi; - } - cno = sbi->s_snapshot_cno; - } else - /* Read-only mount */ - sbi->s_snapshot_cno = cno; - } + err = load_nilfs(nilfs, sb); + if (err) + goto failed_nilfs; - err = nilfs_attach_checkpoint(sbi, cno); + cno = nilfs_last_cno(nilfs); + err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { - printk(KERN_ERR "NILFS: error loading a checkpoint" - " (checkpoint number=%llu).\n", (unsigned long long)cno); - goto failed_sbi; + printk(KERN_ERR "NILFS: error loading last checkpoint " + "(checkpoint number=%llu).\n", (unsigned long long)cno); + goto failed_unload; } if (!(sb->s_flags & MS_RDONLY)) { - err = nilfs_attach_segment_constructor(sbi); + err = nilfs_attach_log_writer(sb, fsroot); if (err) goto failed_checkpoint; } - root = nilfs_iget(sb, NILFS_ROOT_INO); - if (IS_ERR(root)) { - printk(KERN_ERR "NILFS: get root inode failed\n"); - err = PTR_ERR(root); - goto failed_segctor; - } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - iput(root); - printk(KERN_ERR "NILFS: corrupt root inode.\n"); - err = -EINVAL; - goto failed_segctor; - } - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); - printk(KERN_ERR "NILFS: get root dentry failed\n"); - err = -ENOMEM; + err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root); + if (err) goto failed_segctor; - } + + nilfs_put_root(fsroot); if (!(sb->s_flags & MS_RDONLY)) { down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi); + nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } - err = nilfs_mark_recovery_complete(sbi); - if (unlikely(err)) { - printk(KERN_ERR "NILFS: recovery failed.\n"); - goto failed_root; - } - return 0; - failed_root: - dput(sb->s_root); - sb->s_root = NULL; - failed_segctor: - nilfs_detach_segment_constructor(sbi); + nilfs_detach_log_writer(sb); failed_checkpoint: - nilfs_detach_checkpoint(sbi); + nilfs_put_root(fsroot); - failed_sbi: - put_nilfs(nilfs); - sb->s_fs_info = NULL; - kfree(sbi); + failed_unload: + iput(nilfs->ns_sufile); + iput(nilfs->ns_cpfile); + iput(nilfs->ns_dat); + + failed_nilfs: + destroy_nilfs(nilfs); return err; } static int nilfs_remount(struct super_block *sb, int *flags, char *data) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_super_block *sbp; - struct the_nilfs *nilfs = sbi->s_nilfs; + struct the_nilfs *nilfs = sb->s_fs_info; unsigned long old_sb_flags; - struct nilfs_mount_options old_opts; + unsigned long old_mount_opt; int err; + sync_filesystem(sb); old_sb_flags = sb->s_flags; - old_opts.mount_opt = sbi->s_mount_opt; - old_opts.snapshot_cno = sbi->s_snapshot_cno; + old_mount_opt = nilfs->ns_mount_opt; - if (!parse_options(data, sb)) { + if (!parse_options(data, sb, 1)) { err = -EINVAL; goto restore_opts; } sb->s_flags = (sb->s_flags & ~MS_POSIXACL); - if ((*flags & MS_RDONLY) && - sbi->s_snapshot_cno != old_opts.snapshot_cno) { + err = -EINVAL; + + if (!nilfs_valid_fs(nilfs)) { printk(KERN_WARNING "NILFS (device %s): couldn't " - "remount to a different snapshot. \n", - sb->s_id); - err = -EINVAL; + "remount because the filesystem is in an " + "incomplete recovery state.\n", sb->s_id); goto restore_opts; } if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) goto out; if (*flags & MS_RDONLY) { - /* Shutting down the segment constructor */ - nilfs_detach_segment_constructor(sbi); + /* Shutting down log writer */ + nilfs_detach_log_writer(sb); sb->s_flags |= MS_RDONLY; - sbi->s_snapshot_cno = nilfs_last_cno(nilfs); - /* nilfs_set_opt(sbi, SNAPSHOT); */ - /* * Remounting a valid RW partition RDONLY, so set * the RDONLY flag and then mark the partition as valid again. */ down_write(&nilfs->ns_sem); - sbp = nilfs->ns_sbp[0]; - if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && - (nilfs->ns_mount_state & NILFS_VALID_FS)) - sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); - sbp->s_mtime = cpu_to_le64(get_seconds()); - nilfs_commit_super(sbi, 1); + nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } else { + __u64 features; + struct nilfs_root *root; + /* * Mounting a RDONLY partition read-write, so reread and * store the current valid flag. (It may have been changed * by fsck since we originally mounted the partition.) */ - down(&sb->s_bdev->bd_mount_sem); - /* Check existing RW-mount */ - if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) { + down_read(&nilfs->ns_sem); + features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & + ~NILFS_FEATURE_COMPAT_RO_SUPP; + up_read(&nilfs->ns_sem); + if (features) { printk(KERN_WARNING "NILFS (device %s): couldn't " - "remount because a RW-mount exists.\n", - sb->s_id); - err = -EBUSY; - goto rw_remount_failed; - } - if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { - printk(KERN_WARNING "NILFS (device %s): couldn't " - "remount because the current RO-mount is not " - "the latest one.\n", - sb->s_id); - err = -EINVAL; - goto rw_remount_failed; + "remount RDWR because of unsupported optional " + "features (%llx)\n", + sb->s_id, (unsigned long long)features); + err = -EROFS; + goto restore_opts; } + sb->s_flags &= ~MS_RDONLY; - nilfs_clear_opt(sbi, SNAPSHOT); - sbi->s_snapshot_cno = 0; - err = nilfs_attach_segment_constructor(sbi); + root = NILFS_I(sb->s_root->d_inode)->i_root; + err = nilfs_attach_log_writer(sb, root); if (err) - goto rw_remount_failed; + goto restore_opts; down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi); + nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); - - up(&sb->s_bdev->bd_mount_sem); } out: return 0; - rw_remount_failed: - up(&sb->s_bdev->bd_mount_sem); restore_opts: sb->s_flags = old_sb_flags; - sbi->s_mount_opt = old_opts.mount_opt; - sbi->s_snapshot_cno = old_opts.snapshot_cno; + nilfs->ns_mount_opt = old_mount_opt; return err; } @@ -1003,7 +1219,7 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) { char *p, *options = data; substring_t args[MAX_OPT_ARGS]; - int option, token; + int token; int ret = 0; do { @@ -1011,16 +1227,18 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) if (p != NULL && *p) { token = match_token(p, tokens, args); if (token == Opt_snapshot) { - if (!(sd->flags & MS_RDONLY)) + if (!(sd->flags & MS_RDONLY)) { ret++; - else { - ret = match_int(&args[0], &option); - if (!ret) { - if (option > 0) - sd->cno = option; - else - ret++; - } + } else { + sd->cno = simple_strtoull(args[0].from, + NULL, 0); + /* + * No need to see the end pointer; + * match_token() has done syntax + * checking. + */ + if (sd->cno == 0) + ret++; } } if (ret) @@ -1037,66 +1255,33 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) static int nilfs_set_bdev_super(struct super_block *s, void *data) { - struct nilfs_super_data *sd = data; - - s->s_bdev = sd->bdev; + s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; return 0; } static int nilfs_test_bdev_super(struct super_block *s, void *data) { - struct nilfs_super_data *sd = data; - - return s->s_bdev == sd->bdev; -} - -static int nilfs_test_bdev_super2(struct super_block *s, void *data) -{ - struct nilfs_super_data *sd = data; - int ret; - - if (s->s_bdev != sd->bdev) - return 0; - - if (!((s->s_flags | sd->flags) & MS_RDONLY)) - return 1; /* Reuse an old R/W-mode super_block */ - - if (s->s_flags & sd->flags & MS_RDONLY) { - if (down_read_trylock(&s->s_umount)) { - ret = s->s_root && - (sd->cno == NILFS_SB(s)->s_snapshot_cno); - up_read(&s->s_umount); - /* - * This path is locked with sb_lock by sget(). - * So, drop_super() causes deadlock. - */ - return ret; - } - } - return 0; + return (void *)s->s_bdev == data; } -static int -nilfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry * +nilfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { struct nilfs_super_data sd; - struct super_block *s, *s2; - struct the_nilfs *nilfs = NULL; - int err, need_to_close = 1; + struct super_block *s; + fmode_t mode = FMODE_READ | FMODE_EXCL; + struct dentry *root_dentry; + int err, s_new = false; - sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); + if (!(flags & MS_RDONLY)) + mode |= FMODE_WRITE; + + sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); if (IS_ERR(sd.bdev)) - return PTR_ERR(sd.bdev); + return ERR_CAST(sd.bdev); - /* - * To get mount instance using sget() vfs-routine, NILFS needs - * much more information than normal filesystems to identify mount - * instance. For snapshot mounts, not only a mount type (ro-mount - * or rw-mount) but also a checkpoint number is required. - * The results are passed in sget() using nilfs_super_data. - */ sd.cno = 0; sd.flags = flags; if (nilfs_identify((char *)data, &sd)) { @@ -1109,216 +1294,180 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, * will protect the lockfs code from trying to start a snapshot * while we are mounting */ - down(&sd.bdev->bd_mount_sem); - if (!sd.cno && - (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) { - err = (err < 0) ? : -EBUSY; - goto failed_unlock; + mutex_lock(&sd.bdev->bd_fsfreeze_mutex); + if (sd.bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); + err = -EBUSY; + goto failed; } - - /* - * Phase-1: search any existent instance and get the_nilfs - */ - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); - if (IS_ERR(s)) - goto error_s; - - if (!s->s_root) { - err = -ENOMEM; - nilfs = alloc_nilfs(sd.bdev); - if (!nilfs) - goto cancel_new; - } else { - struct nilfs_sb_info *sbi = NILFS_SB(s); - - /* - * s_umount protects super_block from unmount process; - * It covers pointers of nilfs_sb_info and the_nilfs. - */ - nilfs = sbi->s_nilfs; - get_nilfs(nilfs); - up_write(&s->s_umount); - - /* - * Phase-2: search specified snapshot or R/W mode super_block - */ - if (!sd.cno) - /* trying to get the latest checkpoint. */ - sd.cno = nilfs_last_cno(nilfs); - - s2 = sget(fs_type, nilfs_test_bdev_super2, - nilfs_set_bdev_super, &sd); - deactivate_super(s); - /* - * Although deactivate_super() invokes close_bdev_exclusive() at - * kill_block_super(). Here, s is an existent mount; we need - * one more close_bdev_exclusive() call. - */ - s = s2; - if (IS_ERR(s)) - goto error_s; + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, + sd.bdev); + mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); + if (IS_ERR(s)) { + err = PTR_ERR(s); + goto failed; } if (!s->s_root) { char b[BDEVNAME_SIZE]; - s->s_flags = flags; + s_new = true; + + /* New superblock instance created */ + s->s_mode = mode; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); - err = nilfs_fill_super(s, data, flags & MS_VERBOSE, nilfs); + err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (err) - goto cancel_new; + goto failed_super; s->s_flags |= MS_ACTIVE; - need_to_close = 0; - } else if (!(s->s_flags & MS_RDONLY)) { - err = -EBUSY; + } else if (!sd.cno) { + if (nilfs_tree_is_busy(s->s_root)) { + if ((flags ^ s->s_flags) & MS_RDONLY) { + printk(KERN_ERR "NILFS: the device already " + "has a %s mount.\n", + (s->s_flags & MS_RDONLY) ? + "read-only" : "read/write"); + err = -EBUSY; + goto failed_super; + } + } else { + /* + * Try remount to setup mount states if the current + * tree is not mounted and only snapshots use this sb. + */ + err = nilfs_remount(s, &flags, data); + if (err) + goto failed_super; + } } - up(&sd.bdev->bd_mount_sem); - put_nilfs(nilfs); - if (need_to_close) - close_bdev_exclusive(sd.bdev, flags); - simple_set_mnt(mnt, s); - return 0; + if (sd.cno) { + err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); + if (err) + goto failed_super; + } else { + root_dentry = dget(s->s_root); + } - error_s: - up(&sd.bdev->bd_mount_sem); - if (nilfs) - put_nilfs(nilfs); - close_bdev_exclusive(sd.bdev, flags); - return PTR_ERR(s); + if (!s_new) + blkdev_put(sd.bdev, mode); - failed_unlock: - up(&sd.bdev->bd_mount_sem); - failed: - close_bdev_exclusive(sd.bdev, flags); + return root_dentry; - return err; + failed_super: + deactivate_locked_super(s); - cancel_new: - /* Abandoning the newly allocated superblock */ - up(&sd.bdev->bd_mount_sem); - if (nilfs) - put_nilfs(nilfs); - up_write(&s->s_umount); - deactivate_super(s); - /* - * deactivate_super() invokes close_bdev_exclusive(). - * We must finish all post-cleaning before this call; - * put_nilfs() and unlocking bd_mount_sem need the block device. - */ - return err; + failed: + if (!s_new) + blkdev_put(sd.bdev, mode); + return ERR_PTR(err); } -static int nilfs_test_bdev_super3(struct super_block *s, void *data) +struct file_system_type nilfs_fs_type = { + .owner = THIS_MODULE, + .name = "nilfs2", + .mount = nilfs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; +MODULE_ALIAS_FS("nilfs2"); + +static void nilfs_inode_init_once(void *obj) { - struct nilfs_super_data *sd = data; - int ret; + struct nilfs_inode_info *ii = obj; - if (s->s_bdev != sd->bdev) - return 0; - if (down_read_trylock(&s->s_umount)) { - ret = (s->s_flags & MS_RDONLY) && s->s_root && - nilfs_test_opt(NILFS_SB(s), SNAPSHOT); - up_read(&s->s_umount); - if (ret) - return 0; /* ignore snapshot mounts */ - } - return !((sd->flags ^ s->s_flags) & MS_RDONLY); + INIT_LIST_HEAD(&ii->i_dirty); +#ifdef CONFIG_NILFS_XATTR + init_rwsem(&ii->xattr_sem); +#endif + address_space_init_once(&ii->i_btnode_cache); + ii->i_bmap = &ii->i_bmap_data; + inode_init_once(&ii->vfs_inode); } -static int __false_bdev_super(struct super_block *s, void *data) +static void nilfs_segbuf_init_once(void *obj) { -#if 0 /* XXX: workaround for lock debug. This is not good idea */ - up_write(&s->s_umount); -#endif - return -EFAULT; + memset(obj, 0, sizeof(struct nilfs_segment_buffer)); } -/** - * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not. - * fs_type: filesystem type - * bdev: block device - * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount) - * res: pointer to an integer to store result - * - * This function must be called within a section protected by bd_mount_mutex. - */ -static int test_exclusive_mount(struct file_system_type *fs_type, - struct block_device *bdev, int flags) +static void nilfs_destroy_cachep(void) { - struct super_block *s; - struct nilfs_super_data sd = { .flags = flags, .bdev = bdev }; - - s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd); - if (IS_ERR(s)) { - if (PTR_ERR(s) != -EFAULT) - return PTR_ERR(s); - return 0; /* Not found */ - } - up_write(&s->s_umount); - deactivate_super(s); - return 1; /* Found */ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + + if (nilfs_inode_cachep) + kmem_cache_destroy(nilfs_inode_cachep); + if (nilfs_transaction_cachep) + kmem_cache_destroy(nilfs_transaction_cachep); + if (nilfs_segbuf_cachep) + kmem_cache_destroy(nilfs_segbuf_cachep); + if (nilfs_btree_path_cache) + kmem_cache_destroy(nilfs_btree_path_cache); } -struct file_system_type nilfs_fs_type = { - .owner = THIS_MODULE, - .name = "nilfs2", - .get_sb = nilfs_get_sb, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -static int __init init_nilfs_fs(void) +static int __init nilfs_init_cachep(void) { - int err; + nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", + sizeof(struct nilfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, nilfs_inode_init_once); + if (!nilfs_inode_cachep) + goto fail; + + nilfs_transaction_cachep = kmem_cache_create("nilfs2_transaction_cache", + sizeof(struct nilfs_transaction_info), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (!nilfs_transaction_cachep) + goto fail; + + nilfs_segbuf_cachep = kmem_cache_create("nilfs2_segbuf_cache", + sizeof(struct nilfs_segment_buffer), 0, + SLAB_RECLAIM_ACCOUNT, nilfs_segbuf_init_once); + if (!nilfs_segbuf_cachep) + goto fail; + + nilfs_btree_path_cache = kmem_cache_create("nilfs2_btree_path_cache", + sizeof(struct nilfs_btree_path) * NILFS_BTREE_LEVEL_MAX, + 0, 0, NULL); + if (!nilfs_btree_path_cache) + goto fail; - err = nilfs_init_inode_cache(); - if (err) - goto failed; + return 0; - err = nilfs_init_transaction_cache(); - if (err) - goto failed_inode_cache; +fail: + nilfs_destroy_cachep(); + return -ENOMEM; +} - err = nilfs_init_segbuf_cache(); - if (err) - goto failed_transaction_cache; +static int __init init_nilfs_fs(void) +{ + int err; - err = nilfs_btree_path_cache_init(); + err = nilfs_init_cachep(); if (err) - goto failed_segbuf_cache; + goto fail; err = register_filesystem(&nilfs_fs_type); if (err) - goto failed_btree_path_cache; + goto free_cachep; + printk(KERN_INFO "NILFS version 2 loaded\n"); return 0; - failed_btree_path_cache: - nilfs_btree_path_cache_destroy(); - - failed_segbuf_cache: - nilfs_destroy_segbuf_cache(); - - failed_transaction_cache: - nilfs_destroy_transaction_cache(); - - failed_inode_cache: - nilfs_destroy_inode_cache(); - - failed: +free_cachep: + nilfs_destroy_cachep(); +fail: return err; } static void __exit exit_nilfs_fs(void) { - nilfs_destroy_segbuf_cache(); - nilfs_destroy_transaction_cache(); - nilfs_destroy_inode_cache(); - nilfs_btree_path_cache_destroy(); + nilfs_destroy_cachep(); unregister_filesystem(&nilfs_fs_type); } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index a91f15b8673..8ba8229ba07 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> +#include <linux/random.h> #include <linux/crc32.h> #include "nilfs.h" #include "segment.h" @@ -32,9 +33,11 @@ #include "cpfile.h" #include "sufile.h" #include "dat.h" -#include "seglist.h" #include "segbuf.h" + +static int nilfs_valid_sb(struct nilfs_super_block *sbp); + void nilfs_set_last_segment(struct the_nilfs *nilfs, sector_t start_blocknr, u64 seq, __u64 cno) { @@ -42,16 +45,23 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, nilfs->ns_last_pseg = start_blocknr; nilfs->ns_last_seq = seq; nilfs->ns_last_cno = cno; + + if (!nilfs_sb_dirty(nilfs)) { + if (nilfs->ns_prev_seq == nilfs->ns_last_seq) + goto stay_cursor; + + set_nilfs_sb_dirty(nilfs); + } + nilfs->ns_prev_seq = nilfs->ns_last_seq; + + stay_cursor: spin_unlock(&nilfs->ns_last_segment_lock); } /** - * alloc_nilfs - allocate the_nilfs structure + * alloc_nilfs - allocate a nilfs object * @bdev: block device to which the_nilfs is related * - * alloc_nilfs() allocates memory for the_nilfs and - * initializes its reference count and locks. - * * Return Value: On success, pointer to the_nilfs is returned. * On error, NULL is returned. */ @@ -64,48 +74,29 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) return NULL; nilfs->ns_bdev = bdev; - atomic_set(&nilfs->ns_count, 1); - atomic_set(&nilfs->ns_writer_refcount, -1); atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); - mutex_init(&nilfs->ns_writer_mutex); - INIT_LIST_HEAD(&nilfs->ns_supers); + mutex_init(&nilfs->ns_snapshot_mount_mutex); + INIT_LIST_HEAD(&nilfs->ns_dirty_files); + INIT_LIST_HEAD(&nilfs->ns_gc_inodes); + spin_lock_init(&nilfs->ns_inode_lock); + spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); - nilfs->ns_gc_inodes_h = NULL; + nilfs->ns_cptree = RB_ROOT; + spin_lock_init(&nilfs->ns_cptree_lock); init_rwsem(&nilfs->ns_segctor_sem); return nilfs; } /** - * put_nilfs - release a reference to the_nilfs - * @nilfs: the_nilfs structure to be released - * - * put_nilfs() decrements a reference counter of the_nilfs. - * If the reference count reaches zero, the_nilfs is freed. + * destroy_nilfs - destroy nilfs object + * @nilfs: nilfs object to be released */ -void put_nilfs(struct the_nilfs *nilfs) +void destroy_nilfs(struct the_nilfs *nilfs) { - if (!atomic_dec_and_test(&nilfs->ns_count)) - return; - /* - * Increment of ns_count never occur below because the caller - * of get_nilfs() holds at least one reference to the_nilfs. - * Thus its exclusion control is not required here. - */ might_sleep(); - if (nilfs_loaded(nilfs)) { - nilfs_mdt_clear(nilfs->ns_sufile); - nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_clear(nilfs->ns_cpfile); - nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_clear(nilfs->ns_dat); - nilfs_mdt_destroy(nilfs->ns_dat); - /* XXX: how and when to clear nilfs->ns_gc_dat? */ - nilfs_mdt_destroy(nilfs->ns_gc_dat); - } if (nilfs_init(nilfs)) { - nilfs_destroy_gccache(nilfs); brelse(nilfs->ns_sbh[0]); brelse(nilfs->ns_sbh[1]); } @@ -113,17 +104,17 @@ void put_nilfs(struct the_nilfs *nilfs) } static int nilfs_load_super_root(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, sector_t sr_block) + struct super_block *sb, sector_t sr_block) { - static struct lock_class_key dat_lock_key; struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_inode *rawi; unsigned dat_entry_size, segment_usage_size, checkpoint_size; unsigned inode_size; int err; - err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1); + err = nilfs_read_super_root_block(nilfs, sr_block, &bh_sr, 1); if (unlikely(err)) return err; @@ -135,59 +126,22 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, inode_size = nilfs->ns_inode_size; - err = -ENOMEM; - nilfs->ns_dat = nilfs_mdt_new( - nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); - if (unlikely(!nilfs->ns_dat)) + rawi = (void *)bh_sr->b_data + NILFS_SR_DAT_OFFSET(inode_size); + err = nilfs_dat_read(sb, dat_entry_size, rawi, &nilfs->ns_dat); + if (err) goto failed; - nilfs->ns_gc_dat = nilfs_mdt_new( - nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); - if (unlikely(!nilfs->ns_gc_dat)) + rawi = (void *)bh_sr->b_data + NILFS_SR_CPFILE_OFFSET(inode_size); + err = nilfs_cpfile_read(sb, checkpoint_size, rawi, &nilfs->ns_cpfile); + if (err) goto failed_dat; - nilfs->ns_cpfile = nilfs_mdt_new( - nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP); - if (unlikely(!nilfs->ns_cpfile)) - goto failed_gc_dat; - - nilfs->ns_sufile = nilfs_mdt_new( - nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP); - if (unlikely(!nilfs->ns_sufile)) + rawi = (void *)bh_sr->b_data + NILFS_SR_SUFILE_OFFSET(inode_size); + err = nilfs_sufile_read(sb, segment_usage_size, rawi, + &nilfs->ns_sufile); + if (err) goto failed_cpfile; - err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size); - if (unlikely(err)) - goto failed_sufile; - - lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key); - lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key); - - nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); - nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, - sizeof(struct nilfs_cpfile_header)); - nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size, - sizeof(struct nilfs_sufile_header)); - - err = nilfs_mdt_read_inode_direct( - nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_mdt_read_inode_direct( - nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_mdt_read_inode_direct( - nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - raw_sr = (struct nilfs_super_root *)bh_sr->b_data; nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); @@ -195,17 +149,11 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, brelse(bh_sr); return err; - failed_sufile: - nilfs_mdt_destroy(nilfs->ns_sufile); - failed_cpfile: - nilfs_mdt_destroy(nilfs->ns_cpfile); - - failed_gc_dat: - nilfs_mdt_destroy(nilfs->ns_gc_dat); + iput(nilfs->ns_cpfile); failed_dat: - nilfs_mdt_destroy(nilfs->ns_dat); + iput(nilfs->ns_dat); goto failed; } @@ -221,71 +169,187 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) } /** + * nilfs_store_log_cursor - load log cursor from a super block + * @nilfs: nilfs object + * @sbp: buffer storing super block to be read + * + * nilfs_store_log_cursor() reads the last position of the log + * containing a super root from a given super block, and initializes + * relevant information on the nilfs object preparatory for log + * scanning and recovery. + */ +static int nilfs_store_log_cursor(struct the_nilfs *nilfs, + struct nilfs_super_block *sbp) +{ + int ret = 0; + + nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); + nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); + nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); + + nilfs->ns_prev_seq = nilfs->ns_last_seq; + nilfs->ns_seg_seq = nilfs->ns_last_seq; + nilfs->ns_segnum = + nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); + nilfs->ns_cno = nilfs->ns_last_cno + 1; + if (nilfs->ns_segnum >= nilfs->ns_nsegments) { + printk(KERN_ERR "NILFS invalid last segment number.\n"); + ret = -EINVAL; + } + return ret; +} + +/** * load_nilfs - load and recover the nilfs * @nilfs: the_nilfs structure to be released - * @sbi: nilfs_sb_info used to recover past segment + * @sb: super block isntance used to recover past segment * * load_nilfs() searches and load the latest super root, * attaches the last segment, and does recovery if needed. * The caller must call this exclusively for simultaneous mounts. */ -int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) +int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { struct nilfs_recovery_info ri; - unsigned int s_flags = sbi->s_super->s_flags; + unsigned int s_flags = sb->s_flags; int really_read_only = bdev_read_only(nilfs->ns_bdev); - unsigned valid_fs; - int err = 0; + int valid_fs = nilfs_valid_fs(nilfs); + int err; + + if (!valid_fs) { + printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); + if (s_flags & MS_RDONLY) { + printk(KERN_INFO "NILFS: INFO: recovery " + "required for readonly filesystem.\n"); + printk(KERN_INFO "NILFS: write access will " + "be enabled during recovery.\n"); + } + } nilfs_init_recovery_info(&ri); - down_write(&nilfs->ns_sem); - valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); - up_write(&nilfs->ns_sem); + err = nilfs_search_super_root(nilfs, &ri); + if (unlikely(err)) { + struct nilfs_super_block **sbp = nilfs->ns_sbp; + int blocksize; - if (!valid_fs && (s_flags & MS_RDONLY)) { - printk(KERN_INFO "NILFS: INFO: recovery " - "required for readonly filesystem.\n"); - if (really_read_only) { - printk(KERN_ERR "NILFS: write access " - "unavailable, cannot proceed.\n"); - err = -EROFS; - goto failed; + if (err != -EINVAL) + goto scan_error; + + if (!nilfs_valid_sb(sbp[1])) { + printk(KERN_WARNING + "NILFS warning: unable to fall back to spare" + "super block\n"); + goto scan_error; + } + printk(KERN_INFO + "NILFS: try rollback from an earlier position\n"); + + /* + * restore super block with its spare and reconfigure + * relevant states of the nilfs object. + */ + memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); + nilfs->ns_crc_seed = le32_to_cpu(sbp[0]->s_crc_seed); + nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); + + /* verify consistency between two super blocks */ + blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size); + if (blocksize != nilfs->ns_blocksize) { + printk(KERN_WARNING + "NILFS warning: blocksize differs between " + "two super blocks (%d != %d)\n", + blocksize, nilfs->ns_blocksize); + goto scan_error; } - printk(KERN_INFO "NILFS: write access will " - "be enabled during recovery.\n"); - sbi->s_super->s_flags &= ~MS_RDONLY; - } - err = nilfs_search_super_root(nilfs, sbi, &ri); - if (unlikely(err)) { - printk(KERN_ERR "NILFS: error searching super root.\n"); - goto failed; + err = nilfs_store_log_cursor(nilfs, sbp[0]); + if (err) + goto scan_error; + + /* drop clean flag to allow roll-forward and recovery */ + nilfs->ns_mount_state &= ~NILFS_VALID_FS; + valid_fs = 0; + + err = nilfs_search_super_root(nilfs, &ri); + if (err) + goto scan_error; } - err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root); + err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root); if (unlikely(err)) { printk(KERN_ERR "NILFS: error loading super root.\n"); goto failed; } - if (!valid_fs) { - err = nilfs_recover_logical_segments(nilfs, sbi, &ri); - if (unlikely(err)) { - nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_destroy(nilfs->ns_dat); - goto failed; + if (valid_fs) + goto skip_recovery; + + if (s_flags & MS_RDONLY) { + __u64 features; + + if (nilfs_test_opt(nilfs, NORECOVERY)) { + printk(KERN_INFO "NILFS: norecovery option specified. " + "skipping roll-forward recovery\n"); + goto skip_recovery; + } + features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & + ~NILFS_FEATURE_COMPAT_RO_SUPP; + if (features) { + printk(KERN_ERR "NILFS: couldn't proceed with " + "recovery because of unsupported optional " + "features (%llx)\n", + (unsigned long long)features); + err = -EROFS; + goto failed_unload; + } + if (really_read_only) { + printk(KERN_ERR "NILFS: write access " + "unavailable, cannot proceed.\n"); + err = -EROFS; + goto failed_unload; } - if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) - sbi->s_super->s_dirt = 1; + sb->s_flags &= ~MS_RDONLY; + } else if (nilfs_test_opt(nilfs, NORECOVERY)) { + printk(KERN_ERR "NILFS: recovery cancelled because norecovery " + "option was specified for a read/write mount\n"); + err = -EINVAL; + goto failed_unload; } - set_nilfs_loaded(nilfs); + err = nilfs_salvage_orphan_logs(nilfs, sb, &ri); + if (err) + goto failed_unload; + + down_write(&nilfs->ns_sem); + nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */ + err = nilfs_cleanup_super(sb); + up_write(&nilfs->ns_sem); + + if (err) { + printk(KERN_ERR "NILFS: failed to update super block. " + "recovery unfinished.\n"); + goto failed_unload; + } + printk(KERN_INFO "NILFS: recovery complete.\n"); + + skip_recovery: + nilfs_clear_recovery_info(&ri); + sb->s_flags = s_flags; + return 0; + + scan_error: + printk(KERN_ERR "NILFS: error searching super root.\n"); + goto failed; + + failed_unload: + iput(nilfs->ns_cpfile); + iput(nilfs->ns_sufile); + iput(nilfs->ns_dat); failed: nilfs_clear_recovery_info(&ri); - sbi->s_super->s_flags = s_flags; + sb->s_flags = s_flags; return err; } @@ -300,11 +364,29 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) return res; } +/** + * nilfs_nrsvsegs - calculate the number of reserved segments + * @nilfs: nilfs object + * @nsegs: total number of segments + */ +unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) +{ + return max_t(unsigned long, NILFS_MIN_NRSVSEGS, + DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage, + 100)); +} + +void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) +{ + nilfs->ns_nsegments = nsegs; + nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs); +} + static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { - if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { - printk(KERN_ERR "NILFS: revision mismatch " + if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { + printk(KERN_ERR "NILFS: unsupported revision " "(superblock rev.=%d.%d, current rev.=%d.%d). " "Please check the version of mkfs.nilfs.\n", le32_to_cpu(sbp->s_rev_level), @@ -317,22 +399,34 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, return -EINVAL; nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); + if (nilfs->ns_inode_size > nilfs->ns_blocksize) { + printk(KERN_ERR "NILFS: too large inode size: %d bytes.\n", + nilfs->ns_inode_size); + return -EINVAL; + } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) { + printk(KERN_ERR "NILFS: too small inode size: %d bytes.\n", + nilfs->ns_inode_size); + return -EINVAL; + } + nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { - printk(KERN_ERR "NILFS: too short segment. \n"); + printk(KERN_ERR "NILFS: too short segment.\n"); return -EINVAL; } nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); - nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments); nilfs->ns_r_segments_percentage = le32_to_cpu(sbp->s_r_segments_percentage); - nilfs->ns_nrsvsegs = - max_t(unsigned long, NILFS_MIN_NRSVSEGS, - DIV_ROUND_UP(nilfs->ns_nsegments * - nilfs->ns_r_segments_percentage, 100)); + if (nilfs->ns_r_segments_percentage < 1 || + nilfs->ns_r_segments_percentage > 99) { + printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n"); + return -EINVAL; + } + + nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); return 0; } @@ -416,21 +510,29 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, return -EIO; } printk(KERN_WARNING - "NILFS warning: unable to read primary superblock\n"); - } else if (!sbp[1]) + "NILFS warning: unable to read primary superblock " + "(blocksize = %d)\n", blocksize); + } else if (!sbp[1]) { printk(KERN_WARNING - "NILFS warning: unable to read secondary superblock\n"); + "NILFS warning: unable to read secondary superblock " + "(blocksize = %d)\n", blocksize); + } + /* + * Compare two super blocks and set 1 in swp if the secondary + * super block is valid and newer. Otherwise, set 0 in swp. + */ valid[0] = nilfs_valid_sb(sbp[0]); valid[1] = nilfs_valid_sb(sbp[1]); - swp = valid[1] && - (!valid[0] || - le64_to_cpu(sbp[1]->s_wtime) > le64_to_cpu(sbp[0]->s_wtime)); + swp = valid[1] && (!valid[0] || + le64_to_cpu(sbp[1]->s_last_cno) > + le64_to_cpu(sbp[0]->s_last_cno)); if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) { brelse(sbh[1]); sbh[1] = NULL; sbp[1] = NULL; + valid[1] = 0; swp = 0; } if (!valid[swp]) { @@ -440,14 +542,14 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, return -EINVAL; } - if (swp) { + if (!valid[!swp]) printk(KERN_WARNING "NILFS warning: broken superblock. " - "using spare superblock.\n"); + "using spare superblock (blocksize = %d).\n", blocksize); + if (swp) nilfs_swap_super_block(nilfs); - } - nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime); - nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0; + nilfs->ns_sbwcount = 0; + nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); *sbpp = sbp[0]; return 0; @@ -456,50 +558,25 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, /** * init_nilfs - initialize a NILFS instance. * @nilfs: the_nilfs structure - * @sbi: nilfs_sb_info * @sb: super block * @data: mount options * * init_nilfs() performs common initialization per block device (e.g. * reading the super block, getting disk layout information, initializing - * shared fields in the_nilfs). It takes on some portion of the jobs - * typically done by a fill_super() routine. This division arises from - * the nature that multiple NILFS instances may be simultaneously - * mounted on a device. - * For multiple mounts on the same device, only the first mount - * invokes these tasks. + * shared fields in the_nilfs). * * Return Value: On success, 0 is returned. On error, a negative error * code is returned. */ -int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) { - struct super_block *sb = sbi->s_super; struct nilfs_super_block *sbp; - struct backing_dev_info *bdi; int blocksize; int err; down_write(&nilfs->ns_sem); - if (nilfs_init(nilfs)) { - /* Load values from existing the_nilfs */ - sbp = nilfs->ns_sbp[0]; - err = nilfs_store_magic_and_option(sb, sbp, data); - if (err) - goto out; - blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); - if (sb->s_blocksize != blocksize && - !sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "NILFS: blocksize %d unfit to device\n", - blocksize); - err = -EINVAL; - } - sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); - goto out; - } - - blocksize = sb_min_blocksize(sb, BLOCK_SIZE); + blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { printk(KERN_ERR "NILFS: unable to set blocksize\n"); err = -EINVAL; @@ -513,7 +590,18 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) if (err) goto failed_sbh; + err = nilfs_check_feature_compatibility(sb, sbp); + if (err) + goto failed_sbh; + blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); + if (blocksize < NILFS_MIN_BLOCK_SIZE || + blocksize > NILFS_MAX_BLOCK_SIZE) { + printk(KERN_ERR "NILFS: couldn't mount because of unsupported " + "filesystem blocksize %d\n", blocksize); + err = -EINVAL; + goto failed_sbh; + } if (sb->s_blocksize != blocksize) { int hw_blocksize = bdev_logical_block_size(sb->s_bdev); @@ -535,6 +623,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) when reloading fails. */ } nilfs->ns_blocksize_bits = sb->s_blocksize_bits; + nilfs->ns_blocksize = blocksize; + + get_random_bytes(&nilfs->ns_next_generation, + sizeof(nilfs->ns_next_generation)); err = nilfs_store_disk_layout(nilfs, sbp); if (err) @@ -544,31 +636,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); - bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; - if (!bdi) - bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; - nilfs->ns_bdi = bdi ? : &default_backing_dev_info; - - /* Finding last segment */ - nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); - nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); - nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); - - nilfs->ns_seg_seq = nilfs->ns_last_seq; - nilfs->ns_segnum = - nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); - nilfs->ns_cno = nilfs->ns_last_cno + 1; - if (nilfs->ns_segnum >= nilfs->ns_nsegments) { - printk(KERN_ERR "NILFS invalid last segment number.\n"); - err = -EINVAL; - goto failed_sbh; - } - /* Dummy values */ - nilfs->ns_free_segments_count = - nilfs->ns_nsegments - (nilfs->ns_segnum + 1); - - /* Initialize gcinode cache */ - err = nilfs_init_gccache(nilfs); + err = nilfs_store_log_cursor(nilfs, sbp); if (err) goto failed_sbh; @@ -583,59 +651,151 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) goto out; } +int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, + size_t nsegs) +{ + sector_t seg_start, seg_end; + sector_t start = 0, nblocks = 0; + unsigned int sects_per_block; + __u64 *sn; + int ret = 0; + + sects_per_block = (1 << nilfs->ns_blocksize_bits) / + bdev_logical_block_size(nilfs->ns_bdev); + for (sn = segnump; sn < segnump + nsegs; sn++) { + nilfs_get_segment_range(nilfs, *sn, &seg_start, &seg_end); + + if (!nblocks) { + start = seg_start; + nblocks = seg_end - seg_start + 1; + } else if (start + nblocks == seg_start) { + nblocks += seg_end - seg_start + 1; + } else { + ret = blkdev_issue_discard(nilfs->ns_bdev, + start * sects_per_block, + nblocks * sects_per_block, + GFP_NOFS, 0); + if (ret < 0) + return ret; + nblocks = 0; + } + } + if (nblocks) + ret = blkdev_issue_discard(nilfs->ns_bdev, + start * sects_per_block, + nblocks * sects_per_block, + GFP_NOFS, 0); + return ret; +} + int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { - struct inode *dat = nilfs_dat_inode(nilfs); unsigned long ncleansegs; - int err; - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - if (likely(!err)) - *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; - return err; + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; + return 0; } int nilfs_near_disk_full(struct the_nilfs *nilfs) { - struct inode *sufile = nilfs->ns_sufile; unsigned long ncleansegs, nincsegs; - int ret; - - ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); - if (likely(!ret)) { - nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / - nilfs->ns_blocks_per_segment + 1; - if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) - ret++; + + ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); + nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / + nilfs->ns_blocks_per_segment + 1; + + return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs; +} + +struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno) +{ + struct rb_node *n; + struct nilfs_root *root; + + spin_lock(&nilfs->ns_cptree_lock); + n = nilfs->ns_cptree.rb_node; + while (n) { + root = rb_entry(n, struct nilfs_root, rb_node); + + if (cno < root->cno) { + n = n->rb_left; + } else if (cno > root->cno) { + n = n->rb_right; + } else { + atomic_inc(&root->count); + spin_unlock(&nilfs->ns_cptree_lock); + return root; + } } - return ret; + spin_unlock(&nilfs->ns_cptree_lock); + + return NULL; } -int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, - int snapshot_mount) +struct nilfs_root * +nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) { - struct nilfs_sb_info *sbi; - int ret = 0; + struct rb_node **p, *parent; + struct nilfs_root *root, *new; - down_read(&nilfs->ns_sem); - if (cno == 0 || cno > nilfs->ns_cno) - goto out_unlock; - - list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { - if (sbi->s_snapshot_cno == cno && - (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) { - /* exclude read-only mounts */ - ret++; - break; + root = nilfs_lookup_root(nilfs, cno); + if (root) + return root; + + new = kmalloc(sizeof(*root), GFP_KERNEL); + if (!new) + return NULL; + + spin_lock(&nilfs->ns_cptree_lock); + + p = &nilfs->ns_cptree.rb_node; + parent = NULL; + + while (*p) { + parent = *p; + root = rb_entry(parent, struct nilfs_root, rb_node); + + if (cno < root->cno) { + p = &(*p)->rb_left; + } else if (cno > root->cno) { + p = &(*p)->rb_right; + } else { + atomic_inc(&root->count); + spin_unlock(&nilfs->ns_cptree_lock); + kfree(new); + return root; } } - /* for protecting recent checkpoints */ - if (cno >= nilfs_last_cno(nilfs)) - ret++; - out_unlock: - up_read(&nilfs->ns_sem); - return ret; + new->cno = cno; + new->ifile = NULL; + new->nilfs = nilfs; + atomic_set(&new->count, 1); + atomic64_set(&new->inodes_count, 0); + atomic64_set(&new->blocks_count, 0); + + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, &nilfs->ns_cptree); + + spin_unlock(&nilfs->ns_cptree_lock); + + return new; +} + +void nilfs_put_root(struct nilfs_root *root) +{ + if (atomic_dec_and_test(&root->count)) { + struct the_nilfs *nilfs = root->nilfs; + + spin_lock(&nilfs->ns_cptree_lock); + rb_erase(&root->rb_node, &nilfs->ns_cptree); + spin_unlock(&nilfs->ns_cptree_lock); + if (root->ifile) + iput(root->ifile); + + kfree(root); + } } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 30fe58778d0..de8cc53b4a5 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -26,34 +26,34 @@ #include <linux/types.h> #include <linux/buffer_head.h> +#include <linux/rbtree.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> -#include "sb.h" +#include <linux/slab.h> + +struct nilfs_sc_info; /* the_nilfs struct */ enum { THE_NILFS_INIT = 0, /* Information from super_block is set */ - THE_NILFS_LOADED, /* Roll-back/roll-forward has done and - the latest checkpoint was loaded */ THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ + THE_NILFS_GC_RUNNING, /* gc process is running */ + THE_NILFS_SB_DIRTY, /* super block is dirty */ }; /** * struct the_nilfs - struct to supervise multiple nilfs mount points * @ns_flags: flags - * @ns_count: reference count * @ns_bdev: block device - * @ns_bdi: backing dev info - * @ns_writer: back pointer to writable nilfs_sb_info * @ns_sem: semaphore for shared states - * @ns_writer_mutex: mutex protecting ns_writer attach/detach - * @ns_writer_refcount: number of referrers on ns_writer + * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data - * @ns_sbwtime: previous write time of super blocks + * @ns_sbwtime: previous write time of super block + * @ns_sbwcount: write count of super block * @ns_sbsize: size of valid data in super block - * @ns_supers: list of nilfs super block structs + * @ns_mount_state: file system state * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. * @ns_nextnum: index number of the full segment index to be used next @@ -67,15 +67,26 @@ enum { * @ns_last_seq: sequence value of the latest segment * @ns_last_cno: checkpoint number of the latest segment * @ns_prot_seq: least sequence number of segments which must not be reclaimed - * @ns_free_segments_count: counter of free segments - * @ns_segctor_sem: segment constructor semaphore + * @ns_prev_seq: base sequence number used to decide if advance log cursor + * @ns_writer: log writer + * @ns_segctor_sem: semaphore protecting log write * @ns_dat: DAT file inode * @ns_cpfile: checkpoint file inode * @ns_sufile: segusage file inode - * @ns_gc_dat: shadow inode of the DAT file inode for GC + * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) + * @ns_cptree_lock: lock protecting @ns_cptree + * @ns_dirty_files: list of dirty files + * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks + * @ns_next_generation: next generation number for inodes + * @ns_next_gen_lock: lock protecting @ns_next_generation + * @ns_mount_opt: mount options + * @ns_resuid: uid for reserved blocks + * @ns_resgid: gid for reserved blocks + * @ns_interval: checkpoint creation interval + * @ns_watermark: watermark for the number of dirty buffers * @ns_blocksize_bits: bit length of block size + * @ns_blocksize: block size * @ns_nsegments: number of segments in filesystem * @ns_blocks_per_segment: number of blocks per segment * @ns_r_segments_percentage: reserved segments percentage @@ -87,28 +98,22 @@ enum { */ struct the_nilfs { unsigned long ns_flags; - atomic_t ns_count; struct block_device *ns_bdev; - struct backing_dev_info *ns_bdi; - struct nilfs_sb_info *ns_writer; struct rw_semaphore ns_sem; - struct mutex ns_writer_mutex; - atomic_t ns_writer_refcount; + struct mutex ns_snapshot_mount_mutex; /* * used for * - loading the latest checkpoint exclusively. * - allocating a new full segment. - * - protecting s_dirt in the super_block struct - * (see nilfs_write_super) and the following fields. */ struct buffer_head *ns_sbh[2]; struct nilfs_super_block *ns_sbp[2]; - time_t ns_sbwtime[2]; + time_t ns_sbwtime; + unsigned ns_sbwcount; unsigned ns_sbsize; unsigned ns_mount_state; - struct list_head ns_supers; /* * Following fields are dedicated to a writable FS-instance. @@ -136,8 +141,9 @@ struct the_nilfs { u64 ns_last_seq; __u64 ns_last_cno; u64 ns_prot_seq; - unsigned long ns_free_segments_count; + u64 ns_prev_seq; + struct nilfs_sc_info *ns_writer; struct rw_semaphore ns_segctor_sem; /* @@ -147,14 +153,33 @@ struct the_nilfs { struct inode *ns_dat; struct inode *ns_cpfile; struct inode *ns_sufile; - struct inode *ns_gc_dat; - /* GC inode list and hash table head */ + /* Checkpoint tree */ + struct rb_root ns_cptree; + spinlock_t ns_cptree_lock; + + /* Dirty inode list */ + struct list_head ns_dirty_files; + spinlock_t ns_inode_lock; + + /* GC inode list */ struct list_head ns_gc_inodes; - struct hlist_head *ns_gc_inodes_h; + + /* Inode allocator */ + u32 ns_next_generation; + spinlock_t ns_next_gen_lock; + + /* Mount options */ + unsigned long ns_mount_opt; + + uid_t ns_resuid; + gid_t ns_resgid; + unsigned long ns_interval; + unsigned long ns_watermark; /* Disk layout information (static) */ unsigned int ns_blocksize_bits; + unsigned int ns_blocksize; unsigned long ns_nsegments; unsigned long ns_blocks_per_segment; unsigned long ns_r_segments_percentage; @@ -165,9 +190,6 @@ struct the_nilfs { u32 ns_crc_seed; }; -#define NILFS_GCINODE_HASH_BITS 8 -#define NILFS_GCINODE_HASH_SIZE (1<<NILFS_GCINODE_HASH_BITS) - #define THE_NILFS_FNS(bit, name) \ static inline void set_nilfs_##name(struct the_nilfs *nilfs) \ { \ @@ -183,59 +205,95 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \ } THE_NILFS_FNS(INIT, init) -THE_NILFS_FNS(LOADED, loaded) THE_NILFS_FNS(DISCONTINUED, discontinued) +THE_NILFS_FNS(GC_RUNNING, gc_running) +THE_NILFS_FNS(SB_DIRTY, sb_dirty) + +/* + * Mount option operations + */ +#define nilfs_clear_opt(nilfs, opt) \ + do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0) +#define nilfs_set_opt(nilfs, opt) \ + do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0) +#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt) +#define nilfs_write_opt(nilfs, mask, opt) \ + do { (nilfs)->ns_mount_opt = \ + (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \ + NILFS_MOUNT_##opt); \ + } while (0) + +/** + * struct nilfs_root - nilfs root object + * @cno: checkpoint number + * @rb_node: red-black tree node + * @count: refcount of this structure + * @nilfs: nilfs object + * @ifile: inode file + * @inodes_count: number of inodes + * @blocks_count: number of blocks + */ +struct nilfs_root { + __u64 cno; + struct rb_node rb_node; + + atomic_t count; + struct the_nilfs *nilfs; + struct inode *ifile; + + atomic64_t inodes_count; + atomic64_t blocks_count; +}; + +/* Special checkpoint number */ +#define NILFS_CPTREE_CURRENT_CNO 0 /* Minimum interval of periodical update of superblocks (in seconds) */ #define NILFS_SB_FREQ 10 -#define NILFS_ALTSB_FREQ 60 /* spare superblock */ + +static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) +{ + u64 t = get_seconds(); + return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ; +} + +static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) +{ + int flip_bits = nilfs->ns_sbwcount & 0x0FL; + return (flip_bits != 0x08 && flip_bits != 0x0F); +} void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); -struct the_nilfs *alloc_nilfs(struct block_device *); -void put_nilfs(struct the_nilfs *); -int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); -int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); +struct the_nilfs *alloc_nilfs(struct block_device *bdev); +void destroy_nilfs(struct the_nilfs *nilfs); +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); +int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); +unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs); +void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs); +int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); -int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); +struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); +struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, + __u64 cno); +void nilfs_put_root(struct nilfs_root *root); int nilfs_near_disk_full(struct the_nilfs *); void nilfs_fall_back_super_block(struct the_nilfs *); void nilfs_swap_super_block(struct the_nilfs *); -static inline void get_nilfs(struct the_nilfs *nilfs) +static inline void nilfs_get_root(struct nilfs_root *root) { - /* Caller must have at least one reference of the_nilfs. */ - atomic_inc(&nilfs->ns_count); + atomic_inc(&root->count); } -static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs) +static inline int nilfs_valid_fs(struct the_nilfs *nilfs) { - if (atomic_inc_and_test(&nilfs->ns_writer_refcount)) - mutex_lock(&nilfs->ns_writer_mutex); - return nilfs->ns_writer; -} + unsigned valid_fs; -static inline void nilfs_put_writer(struct the_nilfs *nilfs) -{ - if (atomic_add_negative(-1, &nilfs->ns_writer_refcount)) - mutex_unlock(&nilfs->ns_writer_mutex); -} - -static inline void -nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) -{ - mutex_lock(&nilfs->ns_writer_mutex); - nilfs->ns_writer = sbi; - mutex_unlock(&nilfs->ns_writer_mutex); -} - -static inline void -nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) -{ - mutex_lock(&nilfs->ns_writer_mutex); - if (sbi == nilfs->ns_writer) - nilfs->ns_writer = NULL; - mutex_unlock(&nilfs->ns_writer_mutex); + down_read(&nilfs->ns_sem); + valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); + up_read(&nilfs->ns_sem); + return valid_fs; } static inline void |
