diff options
Diffstat (limited to 'fs/nilfs2/mdt.c')
| -rw-r--r-- | fs/nilfs2/mdt.c | 373 |
1 files changed, 205 insertions, 168 deletions
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 47dd815433f..c4dcd1db57e 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -26,7 +26,9 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/swap.h> +#include <linux/slab.h> #include "nilfs.h" +#include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" @@ -34,7 +36,6 @@ #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) -#define INIT_UNUSED_INODE_FIELDS static int nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, @@ -57,15 +58,15 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_buffer_uptodate(bh); - nilfs_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); return 0; } @@ -76,22 +77,11 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, struct buffer_head *, void *)) { - struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; - struct nilfs_sb_info *writer = NULL; struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; - if (!sb) { - writer = nilfs_get_writer(nilfs); - if (!writer) { - err = -EROFS; - goto out; - } - sb = writer->s_super; - } - nilfs_transaction_begin(sb, &ti, 0); err = -ENOMEM; @@ -100,17 +90,14 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, goto failed_unlock; err = -EEXIST; - if (buffer_uptodate(bh) || buffer_mapped(bh)) + if (buffer_uptodate(bh)) goto failed_bh; -#if 0 - /* The uptodate flag is not protected by the page lock, but - the mapped flag is. Thus, we don't have to wait the buffer. */ + wait_on_buffer(bh); if (buffer_uptodate(bh)) goto failed_bh; -#endif - bh->b_bdev = nilfs->ns_bdev; + bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); @@ -127,9 +114,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); - if (writer) - nilfs_put_writer(nilfs); - out: + return err; } @@ -138,7 +123,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, int mode, struct buffer_head **out_bh) { struct buffer_head *bh; - unsigned long blknum = 0; + __u64 blknum = 0; int ret = -ENOMEM; bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); @@ -161,17 +146,13 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, unlock_buffer(bh); goto out; } - if (!buffer_mapped(bh)) { /* unused buffer */ - ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, - &blknum); - if (unlikely(ret)) { - unlock_buffer(bh); - goto failed_bh; - } - bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; - bh->b_blocknr = blknum; - set_buffer_mapped(bh); + + ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum); + if (unlikely(ret)) { + unlock_buffer(bh); + goto failed_bh; } + map_bh(bh, inode->i_sb, (sector_t)blknum); bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -190,7 +171,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, } static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, - struct buffer_head **out_bh) + int readahead, struct buffer_head **out_bh) { struct buffer_head *first_bh, *bh; unsigned long blkoff; @@ -204,16 +185,18 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, if (unlikely(err)) goto failed; - blkoff = block + 1; - for (i = 0; i < nr_ra_blocks; i++, blkoff++) { - err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); - if (likely(!err || err == -EEXIST)) - brelse(bh); - else if (err != -EBUSY) - break; /* abort readahead if bmap lookup failed */ - - if (!buffer_locked(first_bh)) - goto out_no_wait; + if (readahead) { + blkoff = block + 1; + for (i = 0; i < nr_ra_blocks; i++, blkoff++) { + err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); + if (likely(!err || err == -EEXIST)) + brelse(bh); + else if (err != -EBUSY) + break; + /* abort readahead if bmap lookup failed */ + if (!buffer_locked(first_bh)) + goto out_no_wait; + } } wait_on_buffer(first_bh); @@ -254,8 +237,6 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, * * %-ENOENT - the specified block does not exist (hole block) * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) - * * %-EROFS - Read only filesystem (for create mode) */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, @@ -267,7 +248,7 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, /* Should be rewritten with merging nilfs_mdt_read_block() */ retry: - ret = nilfs_mdt_read_block(inode, blkoff, out_bh); + ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh); if (!create || ret != -ENOENT) return ret; @@ -290,8 +271,6 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error - * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { @@ -299,7 +278,7 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) int err; err = nilfs_bmap_delete(ii->i_bmap, block); - if (likely(!err)) { + if (!err || err == -ENOENT) { nilfs_mdt_mark_dirty(inode); nilfs_mdt_forget_block(inode, block); } @@ -367,18 +346,16 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) * %-EIO - I/O error * * %-ENOENT - the specified block does not exist (hole block) - * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) */ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) { struct buffer_head *bh; int err; - err = nilfs_mdt_read_block(inode, block, &bh); + err = nilfs_mdt_read_block(inode, block, 0, &bh); if (unlikely(err)) return err; - nilfs_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); brelse(bh); return 0; @@ -398,131 +375,67 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode = container_of(page->mapping, - struct inode, i_data); - struct super_block *sb = inode->i_sb; - struct nilfs_sb_info *writer = NULL; + struct inode *inode = page->mapping->host; + struct super_block *sb; int err = 0; + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); - if (page->mapping->assoc_mapping) - return 0; /* Do not request flush for shadow page cache */ - if (!sb) { - writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); - if (!writer) - return -EROFS; - sb = writer->s_super; - } + if (!inode) + return 0; + + sb = inode->i_sb; if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); else if (wbc->for_reclaim) nilfs_flush_segment(sb, inode->i_ino); - if (writer) - nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); return err; } -static struct address_space_operations def_mdt_aops = { +static const struct address_space_operations def_mdt_aops = { .writepage = nilfs_mdt_write_page, }; -static struct inode_operations def_mdt_iops; -static struct file_operations def_mdt_fops; +static const struct inode_operations def_mdt_iops; +static const struct file_operations def_mdt_fops; -/* - * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, - * ifile, or gcinodes. This allows the B-tree code and segment constructor - * to treat them like regular files, and this helps to simplify the - * implementation. - * On the other hand, some of the pseudo inodes have an irregular point: - * They don't have valid inode->i_sb pointer because their lifetimes are - * longer than those of the super block structs; they may continue for - * several consecutive mounts/umounts. This would need discussions. - */ -struct inode * -nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, gfp_t gfp_mask) -{ - struct inode *inode = nilfs_alloc_inode(sb); - if (!inode) - return NULL; - else { - struct address_space * const mapping = &inode->i_data; - struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); - - if (!mi) { - nilfs_destroy_inode(inode); - return NULL; - } - mi->mi_nilfs = nilfs; - init_rwsem(&mi->mi_sem); - - inode->i_sb = sb; /* sb may be NULL for some meta data files */ - inode->i_blkbits = nilfs->ns_blocksize_bits; - inode->i_flags = 0; - atomic_set(&inode->i_count, 1); - inode->i_nlink = 1; - inode->i_ino = ino; - inode->i_mode = S_IFREG; - inode->i_private = mi; - -#ifdef INIT_UNUSED_INODE_FIELDS - atomic_set(&inode->i_writecount, 0); - inode->i_size = 0; - inode->i_blocks = 0; - inode->i_bytes = 0; - inode->i_generation = 0; -#ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); -#endif - inode->i_pipe = NULL; - inode->i_bdev = NULL; - inode->i_cdev = NULL; - inode->i_rdev = 0; -#ifdef CONFIG_SECURITY - inode->i_security = NULL; -#endif - inode->dirtied_when = 0; - - INIT_LIST_HEAD(&inode->i_list); - INIT_LIST_HEAD(&inode->i_sb_list); - inode->i_state = 0; -#endif - - spin_lock_init(&inode->i_lock); - mutex_init(&inode->i_mutex); - init_rwsem(&inode->i_alloc_sem); - - mapping->host = NULL; /* instead of inode */ - mapping->flags = 0; - mapping_set_gfp_mask(mapping, gfp_mask); - mapping->assoc_mapping = NULL; - mapping->backing_dev_info = nilfs->ns_bdi; - - inode->i_mapping = mapping; - } +int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) +{ + struct nilfs_mdt_info *mi; - return inode; -} + mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); + if (!mi) + return -ENOMEM; -struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, gfp_t gfp_mask) -{ - struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); + init_rwsem(&mi->mi_sem); + inode->i_private = mi; - if (!inode) - return NULL; + inode->i_mode = S_IFREG; + mapping_set_gfp_mask(inode->i_mapping, gfp_mask); + inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; inode->i_op = &def_mdt_iops; inode->i_fop = &def_mdt_fops; inode->i_mapping->a_ops = &def_mdt_aops; - return inode; + + return 0; } void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, @@ -535,29 +448,153 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } -void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) +/** + * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file + * @inode: inode of the metadata file + * @shadow: shadow mapping + */ +int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct backing_dev_info *bdi = inode->i_sb->s_bdi; + + INIT_LIST_HEAD(&shadow->frozen_buffers); + address_space_init_once(&shadow->frozen_data); + nilfs_mapping_init(&shadow->frozen_data, inode, bdi); + address_space_init_once(&shadow->frozen_btnodes); + nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi); + mi->mi_shadow = shadow; + return 0; +} + +/** + * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map + * @inode: inode of the metadata file + */ +int nilfs_mdt_save_to_shadow_map(struct inode *inode) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; + int ret; + + ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + if (ret) + goto out; + + ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, + &ii->i_btnode_cache); + if (ret) + goto out; + + nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store); + out: + return ret; +} + +int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) +{ + struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; + struct buffer_head *bh_frozen; + struct page *page; + int blkbits = inode->i_blkbits; + + page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); + if (!page) + return -ENOMEM; + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << blkbits, 0); + + bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); + + if (!buffer_uptodate(bh_frozen)) + nilfs_copy_buffer(bh_frozen, bh); + if (list_empty(&bh_frozen->b_assoc_buffers)) { + list_add_tail(&bh_frozen->b_assoc_buffers, + &shadow->frozen_buffers); + set_buffer_nilfs_redirected(bh); + } else { + brelse(bh_frozen); /* already frozen */ + } + + unlock_page(page); + page_cache_release(page); + return 0; +} + +struct buffer_head * +nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) { - shadow->i_mapping->assoc_mapping = orig->i_mapping; - NILFS_I(shadow)->i_btnode_cache.assoc_mapping = - &NILFS_I(orig)->i_btnode_cache; + struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; + struct buffer_head *bh_frozen = NULL; + struct page *page; + int n; + + page = find_lock_page(&shadow->frozen_data, bh->b_page->index); + if (page) { + if (page_has_buffers(page)) { + n = bh_offset(bh) >> inode->i_blkbits; + bh_frozen = nilfs_page_get_nth_block(page, n); + } + unlock_page(page); + page_cache_release(page); + } + return bh_frozen; } -void nilfs_mdt_clear(struct inode *inode) +static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow) { + struct list_head *head = &shadow->frozen_buffers; + struct buffer_head *bh; + + while (!list_empty(head)) { + bh = list_first_entry(head, struct buffer_head, + b_assoc_buffers); + list_del_init(&bh->b_assoc_buffers); + brelse(bh); /* drop ref-count to make it releasable */ + } +} + +/** + * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state + * @inode: inode of the metadata file + */ +void nilfs_mdt_restore_from_shadow_map(struct inode *inode) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; + + down_write(&mi->mi_sem); + + if (mi->mi_palloc_cache) + nilfs_palloc_clear_cache(inode); - invalidate_mapping_pages(inode->i_mapping, 0, -1); - truncate_inode_pages(inode->i_mapping, 0); + nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); - nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); + nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + + nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); + + up_write(&mi->mi_sem); } -void nilfs_mdt_destroy(struct inode *inode) +/** + * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches + * @inode: inode of the metadata file + */ +void nilfs_mdt_clear_shadow_map(struct inode *inode) { - struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; - kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ - kfree(mdi); - nilfs_destroy_inode(inode); + down_write(&mi->mi_sem); + nilfs_release_frozen_buffers(shadow); + truncate_inode_pages(&shadow->frozen_data, 0); + truncate_inode_pages(&shadow->frozen_btnodes, 0); + up_write(&mi->mi_sem); } |
