diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-10 09:33:29 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-10 09:33:29 -0800 |
commit | 4515c3069da5bfb6f08dbfca499464b4cbdfcb50 (patch) | |
tree | 0f067045a0e31f126aebcf3087028639ceb237b4 /fs/ext4 | |
parent | a5eba3f66f812cbc076a1170b3f888ad63f850b2 (diff) | |
parent | fab3a549e204172236779f502eccb4f9bf0dc87d (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (47 commits)
ext4: Fix potential fiemap deadlock (mmap_sem vs. i_data_sem)
ext4: Do not override ext2 or ext3 if built they are built as modules
jbd2: Export jbd2_log_start_commit to fix ext4 build
ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT
ext4: Wait for proper transaction commit on fsync
ext4: fix incorrect block reservation on quota transfer.
ext4: quota macros cleanup
ext4: ext4_get_reserved_space() must return bytes instead of blocks
ext4: remove blocks from inode prealloc list on failure
ext4: wait for log to commit when umounting
ext4: Avoid data / filesystem corruption when write fails to copy data
ext4: Use ext4 file system driver for ext2/ext3 file system mounts
ext4: Return the PTR_ERR of the correct pointer in setup_new_group_blocks()
jbd2: Add ENOMEM checking in and for jbd2_journal_write_metadata_buffer()
ext4: remove unused parameter wbc from __ext4_journalled_writepage()
ext4: remove encountered_congestion trace
ext4: move_extent_per_page() cleanup
ext4: initialize moved_len before calling ext4_move_extents()
ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT
ext4: use ext4_data_block_valid() in ext4_free_blocks()
...
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Kconfig | 10 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 46 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 3 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 23 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 82 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 44 | ||||
-rw-r--r-- | fs/ext4/extents.c | 44 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 54 | ||||
-rw-r--r-- | fs/ext4/inode.c | 193 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 29 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 101 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 27 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 282 | ||||
-rw-r--r-- | fs/ext4/namei.c | 38 | ||||
-rw-r--r-- | fs/ext4/resize.c | 2 | ||||
-rw-r--r-- | fs/ext4/super.c | 118 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 15 |
17 files changed, 613 insertions, 498 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9f2d45d75b1..9acf7e80813 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -26,6 +26,16 @@ config EXT4_FS If unsure, say N. +config EXT4_USE_FOR_EXT23 + bool "Use ext4 for ext2/ext3 file systems" + depends on EXT3_FS=n || EXT2_FS=n + default y + help + Allow the ext4 file system driver code to be used for ext2 or + ext3 file system mounts. This allows users to reduce their + compiled kernel size by using one file system driver for + ext2, ext3, and ext4 file systems. + config EXT4_FS_XATTR bool "Ext4 extended attributes" depends on EXT4_FS diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1d0418980f8..22bc7435d91 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -499,44 +499,6 @@ error_return: } /** - * ext4_free_blocks() -- Free given blocks and update quota - * @handle: handle for this transaction - * @inode: inode - * @block: start physical block to free - * @count: number of blocks to count - * @metadata: Are these metadata blocks - */ -void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, - int metadata) -{ - struct super_block *sb; - unsigned long dquot_freed_blocks; - - /* this isn't the right place to decide whether block is metadata - * inode.c/extents.c knows better, but for safety ... */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - metadata = 1; - - /* We need to make sure we don't reuse - * block released untill the transaction commit. - * writeback mode have weak data consistency so - * don't force data as metadata when freeing block - * for writeback mode. - */ - if (metadata == 0 && !ext4_should_writeback_data(inode)) - metadata = 1; - - sb = inode->i_sb; - - ext4_mb_free_blocks(handle, inode, block, count, - metadata, &dquot_freed_blocks); - if (dquot_freed_blocks) - vfs_dq_free_block(inode, dquot_freed_blocks); - return; -} - -/** * ext4_has_free_blocks() * @sbi: in-core super block structure. * @nblocks: number of needed blocks @@ -761,7 +723,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, ext4_group_t group) { - return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; + if (!ext4_bg_has_super(sb, group)) + return 0; + + if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG)) + return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); + else + return EXT4_SB(sb)->s_gdb_count; } /** diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 50784ef0756..4df8621ec31 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb) if (ext4_bg_has_super(sb, i) && ((i < 5) || ((i % flex_size) == 0))) add_system_zone(sbi, ext4_group_first_block_no(sb, i), - sbi->s_gdb_count + 1); + ext4_bg_num_gdb(sb, i) + 1); gdp = ext4_get_group_desc(sb, i, NULL); ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); if (ret) @@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, struct rb_node *n = sbi->system_blks.rb_node; if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (start_blk + count < start_blk) || (start_blk + count > ext4_blocks_count(sbi->s_es))) return 0; while (n) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8825515eedd..ab31e65d46d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -376,6 +376,12 @@ struct ext4_new_group_data { EXT4_GET_BLOCKS_DIO_CREATE_EXT) /* + * Flags used by ext4_free_blocks + */ +#define EXT4_FREE_BLOCKS_METADATA 0x0001 +#define EXT4_FREE_BLOCKS_FORGET 0x0002 + +/* * ioctl commands */ #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS @@ -703,6 +709,13 @@ struct ext4_inode_info { struct list_head i_aio_dio_complete_list; /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; + + /* + * Transactions that contain inode's metadata needed to complete + * fsync and fdatasync, respectively. + */ + tid_t i_sync_tid; + tid_t i_datasync_tid; }; /* @@ -750,6 +763,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ +#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt #define set_opt(o, opt) o |= EXT4_MOUNT_##opt @@ -1324,8 +1338,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t goal, unsigned long *count, int *errp); extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); -extern void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, int metadata); extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); @@ -1384,16 +1396,15 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int); extern void ext4_discard_preallocations(struct inode *); extern int __init init_ext4_mballoc(void); extern void exit_ext4_mballoc(void); -extern void ext4_mb_free_blocks(handle_t *, struct inode *, - ext4_fsblk_t, unsigned long, int, unsigned long *); +extern void ext4_free_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); extern void ext4_mb_put_buddy_cache_lock(struct super_block *, ext4_group_t, int); /* inode.c */ -int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t blocknr); struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int, int *); struct buffer_head *ext4_bread(handle_t *, struct inode *, diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 6a9409920de..b57e5c711b6 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -4,6 +4,8 @@ #include "ext4_jbd2.h" +#include <trace/events/ext4.h> + int __ext4_journal_get_undo_access(const char *where, handle_t *handle, struct buffer_head *bh) { @@ -32,35 +34,69 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle, return err; } -int __ext4_journal_forget(const char *where, handle_t *handle, - struct buffer_head *bh) +/* + * The ext4 forget function must perform a revoke if we are freeing data + * which has been journaled. Metadata (eg. indirect blocks) must be + * revoked in all cases. + * + * "bh" may be NULL: a metadata block may have been freed from memory + * but there may still be a record of it in the journal, and that record + * still needs to be revoked. + * + * If the handle isn't valid we're not journaling, but we still need to + * call into ext4_journal_revoke() to put the buffer head. + */ +int __ext4_forget(const char *where, handle_t *handle, int is_metadata, + struct inode *inode, struct buffer_head *bh, + ext4_fsblk_t blocknr) { - int err = 0; + int err; - if (ext4_handle_valid(handle)) { - err = jbd2_journal_forget(handle, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, - handle, err); - } - else + might_sleep(); + + trace_ext4_forget(inode, is_metadata, blocknr); + BUFFER_TRACE(bh, "enter"); + + jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " + "data mode %x\n", + bh, is_metadata, inode->i_mode, + test_opt(inode->i_sb, DATA_FLAGS)); + + /* In the no journal case, we can just do a bforget and return */ + if (!ext4_handle_valid(handle)) { bforget(bh); - return err; -} + return 0; + } -int __ext4_journal_revoke(const char *where, handle_t *handle, - ext4_fsblk_t blocknr, struct buffer_head *bh) -{ - int err = 0; + /* Never use the revoke function if we are doing full data + * journaling: there is no need to, and a V1 superblock won't + * support it. Otherwise, only skip the revoke on un-journaled + * data blocks. */ - if (ext4_handle_valid(handle)) { - err = jbd2_journal_revoke(handle, blocknr, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, - handle, err); + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || + (!is_metadata && !ext4_should_journal_data(inode))) { + if (bh) { + BUFFER_TRACE(bh, "call jbd2_journal_forget"); + err = jbd2_journal_forget(handle, bh); + if (err) + ext4_journal_abort_handle(where, __func__, bh, + handle, err); + return err; + } + return 0; } - else - bforget(bh); + + /* + * data!=journal && (is_metadata || should_journal_data(inode)) + */ + BUFFER_TRACE(bh, "call jbd2_journal_revoke"); + err = jbd2_journal_revoke(handle, blocknr, bh); + if (err) { + ext4_journal_abort_handle(where, __func__, bh, handle, err); + ext4_abort(inode->i_sb, __func__, + "error %d when attempting revoke", err); + } + BUFFER_TRACE(bh, "exit"); return err; } diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a2865980342..05eca817d70 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -49,7 +49,7 @@ #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ EXT4_XATTR_TRANS_BLOCKS - 2 + \ - 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* * Define the number of metadata blocks we need to account to modify data. @@ -57,7 +57,7 @@ * This include super block, inode block, quota blocks and xattr blocks */ #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ - 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* Delete operations potentially hit one directory's namespace plus an * entire inode, plus arbitrary amounts of bitmap/indirection data. Be @@ -92,6 +92,7 @@ * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) + #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) #else @@ -99,6 +100,9 @@ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 #endif +#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) int ext4_mark_iloc_dirty(handle_t *handle, @@ -116,12 +120,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); /* - * Wrapper functions with which ext4 calls into JBD. The intent here is - * to allow these to be turned into appropriate stubs so ext4 can control - * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't - * been done yet. + * Wrapper functions with which ext4 calls into JBD. */ - void ext4_journal_abort_handle(const char *caller, const char *err_fn, struct buffer_head *bh, handle_t *handle, int err); @@ -131,13 +131,9 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle, int __ext4_journal_get_write_access(const char *where, handle_t *handle, struct buffer_head *bh); -/* When called with an invalid handle, this will still do a put on the BH */ -int __ext4_journal_forget(const char *where, handle_t *handle, - struct buffer_head *bh); - -/* When called with an invalid handle, this will still do a put on the BH */ -int __ext4_journal_revoke(const char *where, handle_t *handle, - ext4_fsblk_t blocknr, struct buffer_head *bh); +int __ext4_forget(const char *where, handle_t *handle, int is_metadata, + struct inode *inode, struct buffer_head *bh, + ext4_fsblk_t blocknr); int __ext4_journal_get_create_access(const char *where, handle_t *handle, struct buffer_head *bh); @@ -149,12 +145,11 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, __ext4_journal_get_undo_access(__func__, (handle), (bh)) #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, (handle), (bh)) -#define ext4_journal_revoke(handle, blocknr, bh) \ - __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) +#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ + __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\ + (block_nr)) #define ext4_journal_get_create_access(handle, bh) \ __ext4_journal_get_create_access(__func__, (handle), (bh)) -#define ext4_journal_forget(handle, bh) \ - __ext4_journal_forget(__func__, (handle), (bh)) #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) @@ -254,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) return 0; } +static inline void ext4_update_inode_fsync_trans(handle_t *handle, + struct inode *inode, + int datasync) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + if (ext4_handle_valid(handle)) { + ei->i_sync_tid = handle->h_transaction->t_tid; + if (datasync) + ei->i_datasync_tid = handle->h_transaction->t_tid; + } +} + /* super.c */ int ext4_force_commit(struct super_block *sb); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 715264b4bae..3a7928f825e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1007,7 +1007,8 @@ cleanup: for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext4_free_blocks(handle, inode, ablocks[i], 1, 1); + ext4_free_blocks(handle, inode, 0, ablocks[i], 1, + EXT4_FREE_BLOCKS_METADATA); } } kfree(ablocks); @@ -1761,7 +1762,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, while (block < last && block != EXT_MAX_BLOCK) { num = last - block; /* find extent for this block */ + down_read(&EXT4_I(inode)->i_data_sem); path = ext4_ext_find_extent(inode, block, path); + up_read(&EXT4_I(inode)->i_data_sem); if (IS_ERR(path)) { err = PTR_ERR(path); path = NULL; @@ -1957,7 +1960,6 @@ errout: static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { - struct buffer_head *bh; int err; ext4_fsblk_t leaf; @@ -1973,9 +1975,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, if (err) return err; ext_debug("index is empty, remove it, free block %llu\n", leaf); - bh = sb_find_get_block(inode->i_sb, leaf); - ext4_forget(handle, 1, inode, bh, leaf); - ext4_free_blocks(handle, inode, leaf, 1, 1); + ext4_free_blocks(handle, inode, 0, leaf, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); return err; } @@ -2042,12 +2043,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, ext4_lblk_t from, ext4_lblk_t to) { - struct buffer_head *bh; unsigned short ee_len = ext4_ext_get_actual_len(ex); - int i, metadata = 0; + int flags = EXT4_FREE_BLOCKS_FORGET; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - metadata = 1; + flags |= EXT4_FREE_BLOCKS_METADATA; #ifdef EXTENTS_STATS { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2072,11 +2072,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, num = le32_to_cpu(ex->ee_block) + ee_len - from; start = ext_pblock(ex) + ee_len - num; ext_debug("free last %u blocks starting %llu\n", num, start); - for (i = 0; i < num; i++) { - bh = sb_find_get_block(inode->i_sb, start + i); - ext4_forget(handle, 0, inode, bh, start + i); - } - ext4_free_blocks(handle, inode, start, num, metadata); + ext4_free_blocks(handle, inode, 0, start, num, flags); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", @@ -2167,7 +2163,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, correct_index = 1; credits += (ext_depth(inode)) + 1; } - credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); + credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); err = ext4_ext_truncate_extend_restart(handle, inode, credits); if (err) @@ -3064,6 +3060,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { ret = ext4_convert_unwritten_extents_dio(handle, inode, path); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); goto out2; } /* buffered IO case */ @@ -3091,6 +3089,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ret = ext4_ext_convert_to_initialized(handle, inode, path, iblock, max_blocks); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out: if (ret <= 0) { err = ret; @@ -3319,8 +3319,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), 0); + ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), + ext4_ext_get_actual_len(&newex), 0); goto out2; } @@ -3329,10 +3329,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, allocated = ext4_ext_get_actual_len(&newex); set_buffer_new(bh_result); - /* Cache only when it is _not_ an uninitialized extent */ - if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) + /* + * Cache the extent and update transaction to commit on fdatasync only + * when it is _not_ an uninitialized extent. + */ + if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { ext4_ext_put_in_cache(inode, iblock, allocated, newblock, EXT4_EXT_CACHE_EXTENT); + ext4_update_inode_fsync_trans(handle, inode, 1); + } else + ext4_update_inode_fsync_trans(handle, inode, 0); out: if (allocated > max_blocks) allocated = max_blocks; @@ -3720,10 +3726,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Walk the extent tree gathering extent information. * ext4_ext_fiemap_cb will push extents back to user. */ - down_read(&EXT4_I(inode)->i_data_sem); error = ext4_ext_walk_space(inode, start_blk, len_blks, ext4_ext_fiemap_cb, fieinfo); - up_read(&EXT4_I(inode)->i_data_sem); } return error; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 2b1531266ee..0b22497d92e 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -51,25 +51,30 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; + struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - int err, ret = 0; + int ret; + tid_t commit_tid; J_ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file(file, dentry, datasync); + if (inode->i_sb->s_flags & MS_RDONLY) + return 0; + ret = flush_aio_dio_completed_IO(inode); if (ret < 0) - goto out; + return ret; + + if (!journal) + return simple_fsync(file, dentry, datasync); + /* - * data=writeback: + * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. - * sync_inode() will sync the metadata - * - * data=ordered: - * The caller's filemap_fdatawrite() will write the data and - * sync_inode() will write the inode if it is dirty. Then the caller's - * filemap_fdatawait() will wait on the pages. + * Metadata is in the journal, we wait for proper transaction to + * commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). @@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - goto out; - } + if (ext4_should_journal_data(inode)) + return ext4_force_commit(inode->i_sb); - if (!journal) - ret = sync_mapping_buffers(inode->i_mapping); - - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - goto out; - - /* - * The VFS has written the file data. If the inode is unaltered - * then we need not start a commit. - */ - if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 0, /* sys_fsync did this */ - }; - err = sync_inode(inode, &wbc); - if (ret == 0) - ret = err; - } -out: - if (journal && (journal->j_flags & JBD2_BARRIER)) + commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; + if (jbd2_log_start_commit(journal, commit_tid)) + jbd2_log_wait_commit(journal, commit_tid); + else if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); return ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4e8e2f15b8b..5352db1a308 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -71,58 +71,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) } /* - * The ext4 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. - * - * "bh" may be NULL: a metadata block may have been freed from memory - * but there may still be a record of it in the journal, and that record - * still needs to be revoked. - * - * If the handle isn't valid we're not journaling, but we still need to - * call into ext4_journal_revoke() to put the buffer head. - */ -int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t blocknr) -{ - int err; - - might_sleep(); - - BUFFER_TRACE(bh, "enter"); - - jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " - "data mode %x\n", - bh, is_metadata, inode->i_mode, - test_opt(inode->i_sb, DATA_FLAGS)); - - /* Never use the revoke function if we are doing full data - * journaling: there is no need to, and a V1 superblock won't - * support it. Otherwise, only skip the revoke on un-journaled - * data blocks. */ - - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || - (!is_metadata && !ext4_should_journal_data(inode))) { - if (bh) { - BUFFER_TRACE(bh, "call jbd2_journal_forget"); - return ext4_journal_forget(handle, bh); - } - return 0; - } - - /* - * data!=journal && (is_metadata || should_journal_data(inode)) - */ - BUFFER_TRACE(bh, "call ext4_journal_revoke"); - err = ext4_journal_revoke(handle, blocknr, bh); - if (err) - ext4_abort(inode->i_sb, __func__, - "error %d when attempting revoke", err); - BUFFER_TRACE(bh, "exit"); - return err; -} - -/* * Work out how many blocks we need to proceed with the next chunk of a * truncate transaction. */ @@ -721,7 +669,7 @@ allocated: return ret; failed_out: for (i = 0; i < index; i++) - ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); return ret; } @@ -817,14 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, return err; failed: /* Allocation failed, free what we already allocated */ + ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); for (i = 1; i <= n ; i++) { - BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); - ext4_journal_forget(handle, branch[i].bh); + /* + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, + EXT4_FREE_BLOCKS_FORGET); } - for (i = 0; i < indirect_blks; i++) - ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); + for (i = n+1; i < indirect_blks; i++) + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); - ext4_free_blocks(handle, inode, new_blocks[i], num, 0); + ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); return err; } @@ -903,12 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, err_out: for (i = 1; i <= num; i++) { - BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); - ext4_journal_forget(handle, where[i].bh); - ext4_free_blocks(handle, inode, - le32_to_cpu(where[i-1].key), 1, 0); + /* + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, inode, where[i].bh, 0, 1, + EXT4_FREE_BLOCKS_FORGET); } - ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); + ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), + blks, 0); return err; } @@ -1021,10 +979,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, if (!err) err = ext4_splice_branch(handle, inode, iblock, partial, indirect_blks, count); - else + if (err) goto cleanup; set_buffer_new(bh_result); + + ext4_update_inode_fsync_trans(handle, inode, 1); got_it: map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); if (count > blocks_to_boundary) @@ -1052,7 +1012,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode) EXT4_I(inode)->i_reserved_meta_blocks; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - return total; + return (total << inode->i_blkbits); } /* * Calculate the number of metadata blocks need to reserve @@ -1534,6 +1494,16 @@ static int do_journal_get_write_access(handle_t *handle, return ext4_journal_get_write_access(handle, bh); } +/* + * Truncate blocks that were not used by write. We have to truncate the + * pagecache as well so that corresponding buffers get properly unmapped. + */ +static void ext4_truncate_failed_write(struct inode *inode) +{ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); +} + static int ext4_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1599,7 +1569,7 @@ retry: ext4_journal_stop(handle); if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might * still be on the orphan list; we need to @@ -1709,7 +1679,7 @@ static int ext4_ordered_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -1751,7 +1721,7 @@ static int ext4_writeback_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -1814,7 +1784,7 @@ static int ext4_journalled_write_end(struct file *file, if (!ret) ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -2600,7 +2570,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) } static int __ext4_journalled_writepage(struct page *page, - struct writeback_control *wbc, unsigned int len) { struct address_space *mapping = page->mapping; @@ -2758,7 +2727,7 @@ static int ext4_writepage(struct page *page, * doesn't seem much point in redirtying the page here. */ ClearPageChecked(page); - return __ext4_journalled_writepage(page, wbc, len); + return __ext4_journalled_writepage(page, len); } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) @@ -2788,7 +2757,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) * number of contiguous block. So we will limit * number of contiguous block to a sane value */ - if (!(inode->i_flags & EXT4_EXTENTS_FL) && + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && (max_blocks > EXT4_MAX_TRANS_DATA)) max_blocks = EXT4_MAX_TRANS_DATA; @@ -3091,7 +3060,7 @@ retry: * i_size_read because we hold i_mutex. */ if (pos + len > inode->i_size) - ext4_truncate(inode); + ext4_truncate_failed_write(inode); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -4120,6 +4089,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, __le32 *last) { __le32 *p; + int flags = EXT4_FREE_BLOCKS_FORGET; + + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + flags |= EXT4_FREE_BLOCKS_METADATA; + if (try_to_extend_transaction(handle, inode)) { if (bh) { BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); @@ -4134,27 +4108,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, } } - /* - * Any buffers which are on the journal will be in memory. We - * find them on the hash table so jbd2_journal_revoke() will - * run jbd2_journal_forget() on them. We've already detached - * each block from the file, so bforget() in - * jbd2_journal_forget() should be safe. - * - * AKPM: turn on bforget in jbd2_journa |