aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-27 10:26:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-27 10:26:37 -0700
commite4ce30f3779c2ddaa7dfaa4042209e5dbacbada5 (patch)
treecc64c1dcd16b5dbf71ebc8338b339e6fb04abaee
parentb899ebeb05da4287ce845976727e3e83dadd25d5 (diff)
parent14ece1028b3ed53ffec1b1213ffc6acaf79ad77c (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits) ext4: Make fsync sync new parent directories in no-journal mode ext4: Drop whitespace at end of lines ext4: Fix compat EXT4_IOC_ADD_GROUP ext4: Conditionally define compat ioctl numbers tracing: Convert more ext4 events to DEFINE_EVENT ext4: Add new tracepoints to track mballoc's buddy bitmap loads ext4: Add a missing trace hook ext4: restart ext4_ext_remove_space() after transaction restart ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted ext4: Avoid crashing on NULL ptr dereference on a filesystem error ext4: Use bitops to read/modify i_flags in struct ext4_inode_info ext4: Convert calls of ext4_error() to EXT4_ERROR_INODE() ext4: Convert callers of ext4_get_blocks() to use ext4_map_blocks() ext4: Add new abstraction ext4_map_blocks() underneath ext4_get_blocks() ext4: Use our own write_cache_pages() ext4: Show journal_checksum option ext4: Fix for ext4_mb_collect_stats() ext4: check for a good block group before loading buddy pages ext4: Prevent creation of files larger than RLIMIT_FSIZE using fallocate ext4: Remove extraneous newlines in ext4_msg() calls ... Fixed up trivial conflict in fs/ext4/fsync.c
-rw-r--r--fs/ext4/balloc.c5
-rw-r--r--fs/ext4/block_validity.c4
-rw-r--r--fs/ext4/dir.c26
-rw-r--r--fs/ext4/ext4.h167
-rw-r--r--fs/ext4/ext4_jbd2.h8
-rw-r--r--fs/ext4/extents.c417
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/fsync.c35
-rw-r--r--fs/ext4/ialloc.c89
-rw-r--r--fs/ext4/inode.c723
-rw-r--r--fs/ext4/ioctl.c27
-rw-r--r--fs/ext4/mballoc.c120
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c13
-rw-r--r--fs/ext4/namei.c61
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c80
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/ext4/xattr.c39
-rw-r--r--fs/jbd2/transaction.c5
-rw-r--r--fs/quota/dquot.c11
-rw-r--r--include/linux/quotaops.h37
-rw-r--r--include/trace/events/ext4.h94
23 files changed, 1136 insertions, 834 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d2f37a5516c..95b7594c76f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -591,14 +591,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ret = ext4_mb_new_blocks(handle, &ar, errp);
if (count)
*count = ar.len;
-
/*
- * Account for the allocated meta blocks
+ * Account for the allocated meta blocks. We will never
+ * fail EDQUOT for metdata, but we do account for it.
*/
if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ dquot_alloc_block_nofail(inode, ar.len);
}
return ret;
}
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 538c4865508..5b6973fbf1b 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -72,9 +72,9 @@ static int add_system_zone(struct ext4_sb_info *sbi,
else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
- if (start_blk + count > (entry->start_blk +
+ if (start_blk + count > (entry->start_blk +
entry->count))
- entry->count = (start_blk + count -
+ entry->count = (start_blk + count -
entry->start_blk);
new_node = *n;
new_entry = rb_entry(new_node, struct ext4_system_zone,
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 86cb6d86a04..ea5e6cb7e2a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -83,11 +83,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
error_msg = "inode out of bounds";
if (error_msg != NULL)
- __ext4_error(dir->i_sb, function,
- "bad entry in directory #%lu: %s - block=%llu"
+ ext4_error_inode(function, dir,
+ "bad entry in directory: %s - block=%llu"
"offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
- dir->i_ino, error_msg,
- (unsigned long long) bh->b_blocknr,
+ error_msg, (unsigned long long) bh->b_blocknr,
(unsigned) (offset%bh->b_size), offset,
le32_to_cpu(de->inode),
rlen, de->name_len);
@@ -111,7 +110,7 @@ static int ext4_readdir(struct file *filp,
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
- ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
+ ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
((inode->i_size >> sb->s_blocksize_bits) == 1))) {
err = ext4_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) {
@@ -122,20 +121,20 @@ static int ext4_readdir(struct file *filp,
* We don't set the inode dirty flag since it's not
* critical that it get flushed back to the disk.
*/
- EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
+ ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX);
}
stored = 0;
offset = filp->f_pos & (sb->s_blocksize - 1);
while (!error && !stored && filp->f_pos < inode->i_size) {
- ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
- struct buffer_head map_bh;
+ struct ext4_map_blocks map;
struct buffer_head *bh = NULL;
- map_bh.b_state = 0;
- err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
+ map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
+ map.m_len = 1;
+ err = ext4_map_blocks(NULL, inode, &map, 0);
if (err > 0) {
- pgoff_t index = map_bh.b_blocknr >>
+ pgoff_t index = map.m_pblk >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);
if (!ra_has_index(&filp->f_ra, index))
page_cache_sync_readahead(
@@ -143,7 +142,7 @@ static int ext4_readdir(struct file *filp,
&filp->f_ra, filp,
index, 1);
filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
- bh = ext4_bread(NULL, inode, blk, 0, &err);
+ bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
}
/*
@@ -152,9 +151,8 @@ static int ext4_readdir(struct file *filp,
*/
if (!bh) {
if (!dir_has_error) {
- ext4_error(sb, "directory #%lu "
+ EXT4_ERROR_INODE(inode, "directory "
"contains a hole at offset %Lu",
- inode->i_ino,
(unsigned long long) filp->f_pos);
dir_has_error = 1;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf938cf7c5f..60bd31026e7 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -29,6 +29,9 @@
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
+#ifdef __KERNEL__
+#include <linux/compat.h>
+#endif
/*
* The fourth extended filesystem constants/structures
@@ -54,10 +57,10 @@
#endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \
- ext4_error_inode(__func__, (inode), (fmt), ## a);
+ ext4_error_inode(__func__, (inode), (fmt), ## a)
#define EXT4_ERROR_FILE(file, fmt, a...) \
- ext4_error_file(__func__, (file), (fmt), ## a);
+ ext4_error_file(__func__, (file), (fmt), ## a)
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@@ -72,7 +75,7 @@ typedef __u32 ext4_lblk_t;
typedef unsigned int ext4_group_t;
/*
- * Flags used in mballoc's allocation_context flags field.
+ * Flags used in mballoc's allocation_context flags field.
*
* Also used to show what's going on for debugging purposes when the
* flag field is exported via the traceport interface
@@ -126,6 +129,29 @@ struct ext4_allocation_request {
};
/*
+ * Logical to physical block mapping, used by ext4_map_blocks()
+ *
+ * This structure is used to pass requests into ext4_map_blocks() as
+ * well as to store the information returned by ext4_map_blocks(). It
+ * takes less room on the stack than a struct buffer_head.
+ */
+#define EXT4_MAP_NEW (1 << BH_New)
+#define EXT4_MAP_MAPPED (1 << BH_Mapped)
+#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
+#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
+#define EXT4_MAP_UNINIT (1 << BH_Uninit)
+#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
+ EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
+ EXT4_MAP_UNINIT)
+
+struct ext4_map_blocks {
+ ext4_fsblk_t m_pblk;
+ ext4_lblk_t m_lblk;
+ unsigned int m_len;
+ unsigned int m_flags;
+};
+
+/*
* For delayed allocation tracking
*/
struct mpage_da_data {
@@ -321,6 +347,83 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
return flags & EXT4_OTHER_FLMASK;
}
+/*
+ * Inode flags used for atomic set/get
+ */
+enum {
+ EXT4_INODE_SECRM = 0, /* Secure deletion */
+ EXT4_INODE_UNRM = 1, /* Undelete */
+ EXT4_INODE_COMPR = 2, /* Compress file */
+ EXT4_INODE_SYNC = 3, /* Synchronous updates */
+ EXT4_INODE_IMMUTABLE = 4, /* Immutable file */
+ EXT4_INODE_APPEND = 5, /* writes to file may only append */
+ EXT4_INODE_NODUMP = 6, /* do not dump file */
+ EXT4_INODE_NOATIME = 7, /* do not update atime */
+/* Reserved for compression usage... */
+ EXT4_INODE_DIRTY = 8,
+ EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
+ EXT4_INODE_NOCOMPR = 10, /* Don't compress */
+ EXT4_INODE_ECOMPR = 11, /* Compression error */
+/* End compression flags --- maybe not all used */
+ EXT4_INODE_INDEX = 12, /* hash-indexed directory */
+ EXT4_INODE_IMAGIC = 13, /* AFS directory */
+ EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */
+ EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */
+ EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */
+ EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
+ EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
+ EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
+ EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
+ EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
+ EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
+};
+
+#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
+#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \
+ printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \
+ EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); }
+
+/*
+ * Since it's pretty easy to mix up bit numbers and hex values, and we
+ * can't do a compile-time test for ENUM values, we use a run-time
+ * test to make sure that EXT4_XXX_FL is consistent with respect to
+ * EXT4_INODE_XXX. If all is well the printk and BUG_ON will all drop
+ * out so it won't cost any extra space in the compiled kernel image.
+ * But it's important that these values are the same, since we are
+ * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL
+ * must be consistent with the values of FS_XXX_FL defined in
+ * include/linux/fs.h and the on-disk values found in ext2, ext3, and
+ * ext4 filesystems, and of course the values defined in e2fsprogs.
+ *
+ * It's not paranoia if the Murphy's Law really *is* out to get you. :-)
+ */
+static inline void ext4_check_flag_values(void)
+{
+ CHECK_FLAG_VALUE(SECRM);
+ CHECK_FLAG_VALUE(UNRM);
+ CHECK_FLAG_VALUE(COMPR);
+ CHECK_FLAG_VALUE(SYNC);
+ CHECK_FLAG_VALUE(IMMUTABLE);
+ CHECK_FLAG_VALUE(APPEND);
+ CHECK_FLAG_VALUE(NODUMP);
+ CHECK_FLAG_VALUE(NOATIME);
+ CHECK_FLAG_VALUE(DIRTY);
+ CHECK_FLAG_VALUE(COMPRBLK);
+ CHECK_FLAG_VALUE(NOCOMPR);
+ CHECK_FLAG_VALUE(ECOMPR);
+ CHECK_FLAG_VALUE(INDEX);
+ CHECK_FLAG_VALUE(IMAGIC);
+ CHECK_FLAG_VALUE(JOURNAL_DATA);
+ CHECK_FLAG_VALUE(NOTAIL);
+ CHECK_FLAG_VALUE(DIRSYNC);
+ CHECK_FLAG_VALUE(TOPDIR);
+ CHECK_FLAG_VALUE(HUGE_FILE);
+ CHECK_FLAG_VALUE(EXTENTS);
+ CHECK_FLAG_VALUE(EA_INODE);
+ CHECK_FLAG_VALUE(EOFBLOCKS);
+ CHECK_FLAG_VALUE(RESERVED);
+}
+
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
__u32 group; /* Group number for this data */
@@ -332,6 +435,18 @@ struct ext4_new_group_input {
__u16 unused;
};
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
+struct compat_ext4_new_group_input {
+ u32 group;
+ compat_u64 block_bitmap;
+ compat_u64 inode_bitmap;
+ compat_u64 inode_table;
+ u32 blocks_count;
+ u16 reserved_blocks;
+ u16 unused;
+};
+#endif
+
/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
struct ext4_new_group_data {
__u32 group;
@@ -355,7 +470,7 @@ struct ext4_new_group_data {
#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
EXT4_GET_BLOCKS_CREATE)
/* Caller is from the delayed allocation writeout path,
- so set the magic i_delalloc_reserve_flag after taking the
+ so set the magic i_delalloc_reserve_flag after taking the
inode allocation semaphore for */
#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
/* caller is from the direct IO path, request to creation of an
@@ -398,6 +513,7 @@ struct ext4_new_group_data {
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
*/
@@ -408,11 +524,13 @@ struct ext4_new_group_data {
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
+#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
#endif
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
+#endif
/*
@@ -616,9 +734,8 @@ struct ext4_ext_cache {
*/
struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */
- __u32 i_flags;
- ext4_fsblk_t i_file_acl;
__u32 i_dtime;
+ ext4_fsblk_t i_file_acl;
/*
* i_block_group is the number of the block group which contains
@@ -629,6 +746,7 @@ struct ext4_inode_info {
*/
ext4_group_t i_block_group;
unsigned long i_state_flags; /* Dynamic state flags */
+ unsigned long i_flags;
ext4_lblk_t i_dir_start_lookup;
#ifdef CONFIG_EXT4_FS_XATTR
@@ -1062,22 +1180,25 @@ enum {
EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
+ EXT4_STATE_NEWENTRY, /* File just added to dir */
};
-static inline int ext4_test_inode_state(struct inode *inode, int bit)
-{
- return test_bit(bit, &EXT4_I(inode)->i_state_flags);
-}
-
-static inline void ext4_set_inode_state(struct inode *inode, int bit)
-{
- set_bit(bit, &EXT4_I(inode)->i_state_flags);
+#define EXT4_INODE_BIT_FNS(name, field) \
+static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
+{ \
+ return test_bit(bit, &EXT4_I(inode)->i_##field); \
+} \
+static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
+{ \
+ set_bit(bit, &EXT4_I(inode)->i_##field); \
+} \
+static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
+{ \
+ clear_bit(bit, &EXT4_I(inode)->i_##field); \
}
-static inline void ext4_clear_inode_state(struct inode *inode, int bit)
-{
- clear_bit(bit, &EXT4_I(inode)->i_state_flags);
-}
+EXT4_INODE_BIT_FNS(flag, flags)
+EXT4_INODE_BIT_FNS(state, state_flags)
#else
/* Assume that user mode programs are passing in an ext4fs superblock, not
* a kernel struct super_block. This will allow us to call the feature-test
@@ -1264,7 +1385,7 @@ struct ext4_dir_entry_2 {
#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
EXT4_FEATURE_COMPAT_DIR_INDEX) && \
- (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
+ ext4_test_inode_flag((dir), EXT4_INODE_INDEX))
#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
@@ -1678,6 +1799,7 @@ struct ext4_group_info {
ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
+ ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
struct list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK
void *bb_bitmap;
@@ -1772,9 +1894,8 @@ extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
int chunk);
-extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t iblock, unsigned int max_blocks,
- struct buffer_head *bh_result, int flags);
+extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map, int flags);
extern void ext4_ext_truncate(struct inode *);
extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
@@ -1782,6 +1903,8 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
loff_t len);
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len);
+extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map, int flags);
extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
sector_t block, unsigned int max_blocks,
struct buffer_head *bh, int flags);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b79ad512646..dade0c02479 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -273,7 +273,7 @@ static inline int ext4_should_journal_data(struct inode *inode)
return 1;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
return 1;
- if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+ if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 1;
return 0;
}
@@ -284,7 +284,7 @@ static inline int ext4_should_order_data(struct inode *inode)
return 0;
if (!S_ISREG(inode->i_mode))
return 0;
- if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+ if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 0;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
return 1;
@@ -297,7 +297,7 @@ static inline int ext4_should_writeback_data(struct inode *inode)
return 0;
if (EXT4_JOURNAL(inode) == NULL)
return 1;
- if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+ if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 0;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
return 1;
@@ -321,7 +321,7 @@ static inline int ext4_should_dioread_nolock(struct inode *inode)
return 0;
if (!S_ISREG(inode->i_mode))
return 0;
- if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return 0;
if (ext4_should_journal_data(inode))
return 0;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 236b834b4ca..377309c1af6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle,
if (err <= 0)
return err;
err = ext4_truncate_restart_trans(handle, inode, needed);
- /*
- * We have dropped i_data_sem so someone might have cached again
- * an extent we are going to truncate.
- */
- ext4_ext_invalidate_cache(inode);
+ if (err == 0)
+ err = -EAGAIN;
return err;
}
@@ -185,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
/*
* If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
- * block groups per flexgroup, reserve the first block
- * group for directories and special files. Regular
+ * block groups per flexgroup, reserve the first block
+ * group for directories and special files. Regular
* files will start at the second block group. This
- * tends to speed up directory access and improves
+ * tends to speed up directory access and improves
* fsck times.
*/
block_group &= ~(flex_size-1);
@@ -439,10 +436,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
return 0;
corrupted:
- __ext4_error(inode->i_sb, function,
- "bad header/extent in inode #%lu: %s - magic %x, "
+ ext4_error_inode(function, inode,
+ "bad header/extent: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)",
- inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
+ error_msg, le16_to_cpu(eh->eh_magic),
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
max, le16_to_cpu(eh->eh_depth), depth);
@@ -1622,9 +1619,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
merge_done = 1;
WARN_ON(eh->eh_entries == 0);
if (!eh->eh_entries)
- ext4_error(inode->i_sb,
- "inode#%lu, eh->eh_entries = 0!",
- inode->i_ino);
+ EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
}
return merge_done;
@@ -2039,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
struct ext4_ext_cache *cex;
int ret = EXT4_EXT_CACHE_NO;
- /*
+ /*
* We borrow i_block_reservation_lock to protect i_cached_extent
*/
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2361,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
int depth = ext_depth(inode);
struct ext4_ext_path *path;
handle_t *handle;
- int i = 0, err = 0;
+ int i, err;
ext_debug("truncate since %u\n", start);
@@ -2370,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
if (IS_ERR(handle))
return PTR_ERR(handle);
+again:
ext4_ext_invalidate_cache(inode);
/*
* We start scanning from right side, freeing all the blocks
* after i_size and walking into the tree depth-wise.
*/
+ depth = ext_depth(inode);
path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
if (path == NULL) {
ext4_journal_stop(handle);
return -ENOMEM;
}
+ path[0].p_depth = depth;
path[0].p_hdr = ext_inode_hdr(inode);
if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
err = -EIO;
goto out;
}
- path[0].p_depth = depth;
+ i = err = 0;
while (i >= 0 && err == 0) {
if (i == depth) {
@@ -2480,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
out:
ext4_ext_drop_refs(path);
kfree(path);
+ if (err == -EAGAIN)
+ goto again;
ext4_journal_stop(handle);
return err;
@@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error)
/* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
- int ret = -EIO;
+ int ret;
struct bio *bio;
int blkbits, blocksize;
sector_t ee_pblock;
@@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
len = ee_len;
bio = bio_alloc(GFP_NOIO, len);
+ if (!bio)
+ return -ENOMEM;
+
bio->bi_sector = ee_pblock;
bio->bi_bdev = inode->i_sb->s_bdev;
@@ -2595,22 +2598,20 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
submit_bio(WRITE, bio);
wait_for_completion(&event);
- if (test_bit(BIO_UPTODATE, &bio->bi_flags))
- ret = 0;
- else {
- ret = -EIO;
- break;
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ bio_put(bio);
+ return -EIO;
}
bio_put(bio);
ee_len -= done;
ee_pblock += done << (blkbits - 9);
}
- return ret;
+ return 0;
}
#define EXT4_EXT_ZERO_LEN 7
/*
- * This function is called by ext4_ext_get_blocks() if someone tries to write
+ * This function is called by ext4_ext_map_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
* extent into multiple extents (upto three - one initialized and two
* uninitialized).
@@ -2620,39 +2621,55 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
* c> Splits in three extents: Somone is writing in middle of the extent
*/
static int ext4_ext_convert_to_initialized(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t iblock,
- unsigned int max_blocks)
+ struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path)
{
struct ext4_extent *ex, newex, orig_ex;
struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
- ext4_lblk_t ee_block;
+ ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
int ret = 0;
+ int may_zeroout;
+
+ ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+ "block %llu, max_blocks %u\n", inode->i_ino,
+ (unsigned long long)map->m_lblk, map->m_len);
+
+ eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits;
+ if (eof_block < map->m_lblk + map->m_len)
+ eof_block = map->m_lblk + map->m_len;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
- allocated = ee_len - (iblock - ee_block);
- newblock = iblock - ee_block + ext_pblock(ex);
+ allocated = ee_len - (map->m_lblk - ee_block);
+ newblock = map->m_lblk - ee_block + ext_pblock(ex);
+
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ /*
+ * It is safe to convert extent to initialized via explicit
+ * zeroout only if extent is fully insde i_size or new_size.
+ */
+ may_zeroout = ee_block + ee_len <= eof_block;
+
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
- if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+ if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2665,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
return allocated;
}
- /* ex1: ee_block to iblock - 1 : uninitialized */
- if (iblock > ee_block) {
+ /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
+ if (map->m_lblk > ee_block) {
ex1 = ex;
- ex1->ee_len = cpu_to_le16(iblock - ee_block);
+ ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
ext4_ext_mark_uninitialized(ex1);
ex2 = &newex;
}
@@ -2677,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* we insert ex3, if ex1 is NULL. This is to avoid temporary
* overlap of blocks.
*/
- if (!ex1 && allocated > max_blocks)
- ex2->ee_len = cpu_to_le16(max_blocks);
+ if (!ex1 && allocated > map->m_len)
+ ex2->ee_len = cpu_to_le16(map->m_len);
/* ex3: to ee_block + ee_len : uninitialised */
- if (allocated > max_blocks) {
+ if (allocated > map->m_len) {
unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
- if (allocated <= EXT4_EXT_ZERO_LEN) {
+ if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
/*
- * iblock == ee_block is handled by the zerouout
+ * map->m_lblk == ee_block is handled by the zerouout
* at the beginning.
* Mark first half uninitialized.
* Mark second half initialized and zero out the
@@ -2698,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_dirty(handle, inode, path + depth);
ex3 = &newex;
- ex3->ee_block = cpu_to_le32(iblock);
+ ex3->ee_block = cpu_to_le32(map->m_lblk);
ext4_ext_store_pblock(ex3, newblock);
ex3->ee_len = cpu_to_le16(allocated);
err = ext4_ext_insert_extent(handle, inode, path,
@@ -2711,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
- /* blocks available from iblock */
+ /* blocks available from map->m_lblk */
return allocated;
} else if (err)
@@ -2733,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/
depth = ext_depth(inode);
ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode,
- iblock, path);
+ path = ext4_ext_find_extent(inode, map->m_lblk,
+ path);
if (IS_ERR(path)) {
err = PTR_ERR(path);
return err;
@@ -2754,12 +2771,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
return allocated;
}
ex3 = &newex;
- ex3->ee_block = cpu_to_le32(iblock + max_blocks);
- ext4_ext_store_pblock(ex3, newblock + max_blocks);
- ex3->ee_len = cpu_to_le16(allocated - max_blocks);
+ ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
+ ext4_ext_store_pblock(ex3, newblock + map->m_len);
+ ex3->ee_len = cpu_to_le16(allocated - map->m_len);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2769,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
- /* blocks available from iblock */
+ /* blocks available from map->m_lblk */
return allocated;
} else if (err)
@@ -2783,11 +2800,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* update the extent length after successful insert of the
* split extent
*/
- orig_ex.ee_len = cpu_to_le16(ee_len -
- ext4_ext_get_actual_len(ex3));
+ ee_len -= ext4_ext_get_actual_len(ex3);
+ orig_ex.ee_len = cpu_to_le16(ee_len);
+ may_zeroout = ee_block + ee_len <= eof_block;
+
depth = newdepth;
ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, iblock, path);
+ path = ext4_ext_find_extent(inode, map->m_lblk, path);
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out;
@@ -2801,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (err)
goto out;
- allocated = max_blocks;
+ allocated = map->m_len;
/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
* to insert a extent in the middle zerout directly
* otherwise give the extent a chance to merge to left
*/
if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
- iblock != ee_block) {
+ map->m_lblk != ee_block && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2818,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
- /* blocks available from iblock */
+ /* blocks available from map->m_lblk */
return allocated;
}
}
@@ -2829,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/
if (ex1 && ex1 != ex) {
ex1 = ex;
- ex1->ee_len = cpu_to_le16(iblock - ee_block);
+ ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
ext4_ext_mark_uninitialized(ex1);
ex2 = &newex;
}
- /* ex2: iblock to iblock + maxblocks-1 : initialised */
- ex2->ee_block = cpu_to_le32(iblock);
+ /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
+ ex2->ee_block = cpu_to_le32(map->m_lblk);
ext4_ext_store_pblock(ex2, n