aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-08-03 10:50:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-08-03 10:50:44 -0700
commit8f616cd5249e03c9e1b371623d85e76d4b86bbc1 (patch)
tree94cbd990ac45743ac75203a7e1bd6a8fb63c5bc2 /fs
parent7e31aa11fc672bbe0dd0da59513c9efe3809ced7 (diff)
parent7d55992d60caa390460bad1a974eb2b3c11538f4 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: remove write-only variables from ext4_ordered_write_end ext4: unexport jbd2_journal_update_superblock ext4: Cleanup whitespace and other miscellaneous style issues ext4: improve ext4_fill_flex_info() a bit ext4: Cleanup the block reservation code path ext4: don't assume extents can't cross block groups when truncating ext4: Fix lack of credits BUG() when deleting a badly fragmented inode ext4: Fix ext4_ext_journal_restart() ext4: fix ext4_da_write_begin error path jbd2: don't abort if flushing file data failed ext4: don't read inode block if the buffer has a write error ext4: Don't allow lg prealloc list to be grow large. ext4: Convert the usage of NR_CPUS to nr_cpu_ids. ext4: Improve error handling in mballoc ext4: lock block groups when initializing ext4: sync up block and inode bitmap reading functions ext4: Allow read/only mounts with corrupted block group checksums ext4: Fix data corruption when writing to prealloc area
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/acl.c188
-rw-r--r--fs/ext4/balloc.c11
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/extents.c55
-rw-r--r--fs/ext4/ialloc.c58
-rw-r--r--fs/ext4/inode.c164
-rw-r--r--fs/ext4/mballoc.c254
-rw-r--r--fs/ext4/mballoc.h10
-rw-r--r--fs/ext4/resize.c79
-rw-r--r--fs/ext4/super.c316
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/jbd2/commit.c24
-rw-r--r--fs/jbd2/journal.c1
13 files changed, 729 insertions, 434 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c7d04e16544..694ed6fadcc 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
- for (n=0; n < count; n++) {
+ for (n = 0; n < count; n++) {
ext4_acl_entry *entry =
(ext4_acl_entry *)value;
if ((char *)value + sizeof(ext4_acl_entry_short) > end)
goto fail;
acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
- switch(acl->a_entries[n].e_tag) {
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- value = (char *)value +
- sizeof(ext4_acl_entry_short);
- acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
- break;
-
- case ACL_USER:
- case ACL_GROUP:
- value = (char *)value + sizeof(ext4_acl_entry);
- if ((char *)value > end)
- goto fail;
- acl->a_entries[n].e_id =
- le32_to_cpu(entry->e_id);
- break;
-
- default:
+
+ switch (acl->a_entries[n].e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ value = (char *)value +
+ sizeof(ext4_acl_entry_short);
+ acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+ break;
+
+ case ACL_USER:
+ case ACL_GROUP:
+ value = (char *)value + sizeof(ext4_acl_entry);
+ if ((char *)value > end)
goto fail;
+ acl->a_entries[n].e_id =
+ le32_to_cpu(entry->e_id);
+ break;
+
+ default:
+ goto fail;
}
}
if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
return ERR_PTR(-ENOMEM);
ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
e = (char *)ext_acl + sizeof(ext4_acl_header);
- for (n=0; n < acl->a_count; n++) {
+ for (n = 0; n < acl->a_count; n++) {
ext4_acl_entry *entry = (ext4_acl_entry *)e;
entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
- switch(acl->a_entries[n].e_tag) {
- case ACL_USER:
- case ACL_GROUP:
- entry->e_id =
- cpu_to_le32(acl->a_entries[n].e_id);
- e += sizeof(ext4_acl_entry);
- break;
-
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- e += sizeof(ext4_acl_entry_short);
- break;
-
- default:
- goto fail;
+ switch (acl->a_entries[n].e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+ e += sizeof(ext4_acl_entry);
+ break;
+
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ e += sizeof(ext4_acl_entry_short);
+ break;
+
+ default:
+ goto fail;
}
}
return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
if (!test_opt(inode->i_sb, POSIX_ACL))
return NULL;
- switch(type) {
- case ACL_TYPE_ACCESS:
- acl = ext4_iget_acl(inode, &ei->i_acl);
- if (acl != EXT4_ACL_NOT_CACHED)
- return acl;
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- break;
-
- case ACL_TYPE_DEFAULT:
- acl = ext4_iget_acl(inode, &ei->i_default_acl);
- if (acl != EXT4_ACL_NOT_CACHED)
- return acl;
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
- break;
-
- default:
- return ERR_PTR(-EINVAL);
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ acl = ext4_iget_acl(inode, &ei->i_acl);
+ if (acl != EXT4_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ acl = ext4_iget_acl(inode, &ei->i_default_acl);
+ if (acl != EXT4_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ break;
+
+ default:
+ return ERR_PTR(-EINVAL);
}
retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
kfree(value);
if (!IS_ERR(acl)) {
- switch(type) {
- case ACL_TYPE_ACCESS:
- ext4_iset_acl(inode, &ei->i_acl, acl);
- break;
-
- case ACL_TYPE_DEFAULT:
- ext4_iset_acl(inode, &ei->i_default_acl, acl);
- break;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ext4_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext4_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
}
}
return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
- switch(type) {
- case ACL_TYPE_ACCESS:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- if (acl) {
- mode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
- if (error < 0)
- return error;
- else {
- inode->i_mode = mode;
- ext4_mark_inode_dirty(handle, inode);
- if (error == 0)
- acl = NULL;
- }
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+ if (error < 0)
+ return error;
+ else {
+ inode->i_mode = mode;
+ ext4_mark_inode_dirty(handle, inode);
+ if (error == 0)
+ acl = NULL;
}
- break;
+ }
+ break;
- case ACL_TYPE_DEFAULT:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EACCES : 0;
- break;
+ case ACL_TYPE_DEFAULT:
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ break;
- default:
- return -EINVAL;
+ default:
+ return -EINVAL;
}
if (acl) {
value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
kfree(value);
if (!error) {
- switch(type) {
- case ACL_TYPE_ACCESS:
- ext4_iset_acl(inode, &ei->i_acl, acl);
- break;
-
- case ACL_TYPE_DEFAULT:
- ext4_iset_acl(inode, &ei->i_default_acl, acl);
- break;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ext4_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext4_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
}
}
return error;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b983..1ae5004e93f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
if (unlikely(!bh)) {
ext4_error(sb, __func__,
"Cannot read block bitmap - "
- "block_group = %d, block_bitmap = %llu",
- (int)block_group, (unsigned long long)bitmap_blk);
+ "block_group = %lu, block_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
if (bh_uptodate_or_lock(bh))
return bh;
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc);
set_buffer_uptodate(bh);
unlock_buffer(bh);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
return bh;
}
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (bh_submit_read(bh) < 0) {
put_bh(bh);
ext4_error(sb, __func__,
"Cannot read block bitmap - "
- "block_group = %d, block_bitmap = %llu",
- (int)block_group, (unsigned long long)bitmap_blk);
+ "block_group = %lu, block_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
ext4_valid_block_bitmap(sb, desc, block_group, bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b1..6c7924d9e35 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
/* inode.c */
-void ext4_da_release_space(struct inode *inode, int used, int to_free);
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892e..612c3d2c382 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
if (handle->h_buffer_credits > needed)
return 0;
err = ext4_journal_extend(handle, needed);
- if (err)
+ if (err <= 0)
return err;
return ext4_journal_restart(handle, needed);
}
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
/*
* get the next allocated block if the extent in the path
- * is before the requested block(s)
+ * is before the requested block(s)
*/
if (b2 < b1) {
b2 = ext4_ext_next_allocated_block(path);
@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
BUG_ON(b != ex_ee_block + ex_ee_len - 1);
}
- /* at present, extent can't cross block group: */
- /* leaf + bitmap + group desc + sb + inode */
- credits = 5;
+ /*
+ * 3 for leaf, sb, and inode plus 2 (bmap and group
+ * descriptor) for each block group; assume two block
+ * groups plus ex_ee_len/blocks_per_block_group for
+ * the worst case
+ */
+ credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
if (ex == EXT_FIRST_EXTENT(eh)) {
correct_index = 1;
credits += (ext_depth(inode)) + 1;
@@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
if (allocated <= EXT4_EXT_ZERO_LEN) {
- /* Mark first half uninitialized.
+ /*
+ * iblock == ee_block is handled by the zerouout
+ * at the beginning.
+ * Mark first half uninitialized.
* Mark second half initialized and zero out the
* initialized extent
*/
@@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
- /* zeroed the full extent */
+ /* blocks available from iblock */
return allocated;
} else if (err)
@@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = PTR_ERR(path);
return err;
}
+ /* get the second half extent details */
ex = path[depth].p_ext;
err = ext4_ext_get_access(handle, inode,
path + depth);
@@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
+ /* blocks available from iblock */
return allocated;
} else if (err)
@@ -2418,23 +2427,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/
orig_ex.ee_len = cpu_to_le16(ee_len -
ext4_ext_get_actual_len(ex3));
- if (newdepth != depth) {
- depth = newdepth;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, iblock, path);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
- eh = path[depth].p_hdr;
- ex = path[depth].p_ext;
- if (ex2 != &newex)
- ex2 = ex;
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
+ depth = newdepth;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, iblock, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
}
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+ if (ex2 != &newex)
+ ex2 = ex;
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+
allocated = max_blocks;
/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
+ /* blocks available from iblock */
return allocated;
}
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344..655e760212b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
* Return buffer_head of bitmap on success or NULL.
*/
static struct buffer_head *
-read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc *desc;
struct buffer_head *bh = NULL;
+ ext4_fsblk_t bitmap_blk;
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
- goto error_out;
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
- if (!buffer_uptodate(bh)) {
- lock_buffer(bh);
- if (!buffer_uptodate(bh)) {
- ext4_init_inode_bitmap(sb, bh, block_group,
- desc);
- set_buffer_uptodate(bh);
- }
- unlock_buffer(bh);
- }
- } else {
- bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ return NULL;
+ bitmap_blk = ext4_inode_bitmap(sb, desc);
+ bh = sb_getblk(sb, bitmap_blk);
+ if (unlikely(!bh)) {
+ ext4_error(sb, __func__,
+ "Cannot read inode bitmap - "
+ "block_group = %lu, inode_bitmap = %llu",
+ block_group, bitmap_blk);
+ return NULL;
}
- if (!bh)
- ext4_error(sb, "read_inode_bitmap",
+ if (bh_uptodate_or_lock(bh))
+ return bh;
+
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ ext4_init_inode_bitmap(sb, bh, block_group, desc);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ return bh;
+ }
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ if (bh_submit_read(bh) < 0) {
+ put_bh(bh);
+ ext4_error(sb, __func__,
"Cannot read inode bitmap - "
"block_group = %lu, inode_bitmap = %llu",
- block_group, ext4_inode_bitmap(sb, desc));
-error_out:
+ block_group, bitmap_blk);
+ return NULL;
+ }
return bh;
}
@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
}
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
- bitmap_bh = read_inode_bitmap(sb, block_group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
@@ -623,7 +633,7 @@ got_group:
goto fail;
brelse(bitmap_bh);
- bitmap_bh = read_inode_bitmap(sb, group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, group);
if (!bitmap_bh)
goto fail;
@@ -728,7 +738,7 @@ got:
/* When marking the block group with
* ~EXT4_BG_INODE_UNINIT we don't want to depend
- * on the value of bg_itable_unsed even though
+ * on the value of bg_itable_unused even though
* mke2fs could have initialized the same for us.
* Instead we calculated the value below
*/
@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
- bitmap_bh = read_inode_bitmap(sb, block_group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh) {
ext4_warning(sb, __func__,
"inode bitmap error for orphan %lu", ino);
@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
brelse(bitmap_bh);
- bitmap_bh = read_inode_bitmap(sb, i);
+ bitmap_bh = ext4_read_inode_bitmap(sb, i);
if (!bitmap_bh)
continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9843b046c23..59fbbe899ac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
void ext4_delete_inode (struct inode * inode)
{
handle_t *handle;
+ int err;
if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode)
if (is_bad_inode(inode))
goto no_delete;
- handle = start_transaction(inode);
+ handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
if (IS_ERR(handle)) {
+ ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
* If we're going to skip the normal cleanup, we still need to
* make sure that the in-core orphan linked list is properly
@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode)
if (IS_SYNC(inode))
handle->h_sync = 1;
inode->i_size = 0;
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (err) {
+ ext4_warning(inode->i_sb, __func__,
+ "couldn't mark inode dirty (err %d)", err);
+ goto stop_handle;
+ }
if (inode->i_blocks)
ext4_truncate(inode);
+
+ /*
+ * ext4_ext_truncate() doesn't reserve any slop when it
+ * restarts journal transactions; therefore there may not be
+ * enough credits left in the handle to remove the inode from
+ * the orphan list and set the dtime field.
+ */
+ if (handle->h_buffer_credits < 3) {
+ err = ext4_journal_extend(handle, 3);
+ if (err > 0)
+ err = ext4_journal_restart(handle, 3);
+ if (err != 0) {
+ ext4_warning(inode->i_sb, __func__,
+ "couldn't extend journal (err %d)", err);
+ stop_handle:
+ ext4_journal_stop(handle);
+ goto no_delete;
+ }
+ }
+
/*
* Kill off the orphan record which ext4_truncate created.
* AKPM: I think this can be inside the above `if'.
@@ -952,6 +980,67 @@ out:
return err;
}
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate @blocks for non extent file based file
+ */
+static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+ int ind_blks, dind_blks, tind_blks;
+
+ /* number of new indirect blocks needed */
+ ind_blks = (blocks + icap - 1) / icap;
+
+ dind_blks = (ind_blks + icap - 1) / icap;
+
+ tind_blks = 1;
+
+ return ind_blks + dind_blks + tind_blks;
+}
+
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate given number of blocks
+ */
+static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+ return ext4_ext_calc_metadata_amount(inode, blocks);
+
+ return ext4_indirect_calc_metadata_amount(inode, blocks);
+}
+
+static void ext4_da_update_reserve_space(struct inode *inode, int used)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int total, mdb, mdb_free;
+
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ /* recalculate the number of metablocks still need to be reserved */
+ total = EXT4_I(inode)->i_reserved_data_blocks - used;
+ mdb = ext4_calc_metadata_amount(inode, total);
+
+ /* figure out how many metablocks to release */
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+ /* Account for allocated meta_blocks */
+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+ /* update fs free blocks counter for truncate case */
+ percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+
+ /* update per-inode reservations */
+ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= used;
+
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+}
+
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
/*
@@ -965,10 +1054,9 @@ out:
/*
+ * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * and returns if the blocks are already mapped.
*
- *
- * ext4_ext4 get_block() wrapper function
- * It will do a look up first, and returns if the blocks already mapped.
* Otherwise it takes the write lock of the i_data_sem and allocate blocks
* and store the allocated blocks in the result buffer head and mark it
* mapped.
@@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
* which were deferred till now
*/
if ((retval > 0) && buffer_delay(bh))
- ext4_da_release_space(inode, retval, 0);
+ ext4_da_update_reserve_space(inode, retval);
}
up_write((&EXT4_I(inode)->i_data_sem));
@@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file,
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
- unsigned from, to;
int ret = 0, ret2;
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
-
ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) {
@@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied;
}
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
- */
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
-{
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ind_blks, dind_blks, tind_blks;
-
- /* number of new indirect blocks needed */
- ind_blks = (blocks + icap - 1) / icap;
-
- dind_blks = (ind_blks + icap - 1) / icap;
-
- tind_blks = 1;
-
- return ind_blks + dind_blks + tind_blks;
-}
-
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
- */
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
-{
- if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_calc_metadata_amount(inode, blocks);
-
- return ext4_indirect_calc_metadata_amount(inode, blocks);
-}
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{
@@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return -ENOSPC;
}
-
/* reduce fs free blocks counter */
percpu_counter_sub(&sbi->s_freeblocks_counter, total);
@@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
return 0; /* success */
}
-void ext4_da_release_space(struct inode *inode, int used, int to_free)
+static void ext4_da_release_space(struct inode *inode, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int total, mdb, mdb_free, release;
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
/* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+ total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
mdb = ext4_calc_metadata_amount(inode, total);
/* figure out how many metablocks to release */
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
release = to_free + mdb_free;
/* update fs free blocks counter for truncate case */
percpu_counter_add(&sbi->s_freeblocks_counter, release);
/* update per-inode reservations */
- BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
+ BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= to_free;
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
EXT4_I(inode)->i_reserved_meta_blocks = mdb;
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
@@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page,
}
curr_off = next_off;
} while ((bh = bh->b_this_page) != head);
- ext4_da_release_space(page->mapping->host, 0, to_release);
+ ext4_da_release_space(page->mapping->host, to_release);
}
/*
@@ -2280,8 +2329,11 @@ retry:
}
page = __grab_cache_page(mapping, index);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ ext4_journal_stop(handle);
+ ret = -ENOMEM;
+ goto out;
+ }
*pagep = page;
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
@@ -3590,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
}
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
+
+ /*
+ * If the buffer has the write error flag, we have failed
+ * to write out another inode in the same block. In this
+ * case, we don't have to read the block because we may
+ * read the old inode data successfully.
+ */
+ if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
if (buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */
unlock_buffer(bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbe..865e9ddb44d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
if (bh_uptodate_or_lock(bh[i]))
continue;
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh[i],
first_group + i, desc);
set_buffer_uptodate(bh[i]);
unlock_buffer(bh[i]);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
continue;
}
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
get_bh(bh[i]);
bh[i]->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh[i]);
@@ -2477,7 +2480,7 @@ err_freesgi:
int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned i;
+ unsigned i, j;
unsigned offset;
unsigned max;
int ret;
@@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
- i = sizeof(struct ext4_locality_group) * NR_CPUS;
+ i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
if (sbi->s_locality_groups == NULL) {
clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
kfree(sbi->s_mb_maxs);
return -ENOMEM;
}
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
struct ext4_locality_group *lg;
lg = &sbi->s_locality_groups[i];
mutex_init(&lg->lg_mutex);
- INIT_LIST_HEAD(&lg->lg_prealloc_list);
+ for (j = 0; j < PREALLOC_TB_SIZE; j++)
+ INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
spin_lock_init(&lg->lg_prealloc_lock);
}
@@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa)
{
unsigned int len = ac->ac_o_ex.fe_len;
+
ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
&ac->ac_b_ex.fe_group,
&ac->ac_b_ex.fe_start);
@@ -3282,6 +3287,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
static noinline_for_stack int
ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
{
+ int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa;
@@ -3322,22 +3328,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
lg = ac->ac_lg;
if (lg == NULL)
return 0;
-
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
- atomic_inc(&pa->pa_count);
- ext4_mb_use_group_pa(ac, pa);
+ order = fls(ac->ac_o_ex.fe_len) - 1;
+ if (order > PREALLOC_TB_SIZE - 1)
+ /* The max size of hash table is PREALLOC_TB_SIZE */
+ order = PREALLOC_TB_SIZE - 1;
+
+ for (i = order; i < PREALLOC_TB_SIZE; i++) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
+ pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (pa->pa_deleted == 0 &&
+ pa->pa_free >= ac->ac_o_ex.fe_len) {
+ atomic_inc(&pa->pa_count);
+ ext4_mb_use_group_pa(ac, pa);
+ spin_unlock(&pa->pa_lock);
+ ac->ac_criteria = 20;
+ rcu_read_unlock();
+ return 1;
+ }
spin_unlock(&pa->pa_lock);
- ac->ac_criteria = 20;
- rcu_read_unlock();
- return 1;
}
- spin_unlock(&pa->pa_lock);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
return 0;
}
@@ -3560,6 +3573,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_free = pa->pa_len;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
+ INIT_LIST_HEAD(&pa->pa_inode_list);
pa->pa_deleted = 0;
pa->pa_linear = 1;
@@ -3580,10 +3594,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
- spin_lock(pa->pa_obj_lock);
- list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list);
- spin_unlock(pa->pa_obj_lock);
-
+ /*
+ * We will later add the new pa to the right bucket
+ * after updating the pa_free in ext4_mb_release_context
+ */
return 0;
}
@@ -3733,20 +3747,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
bitmap_bh = ext4_read_block_bitmap(sb, group);
if (bitmap_bh == NULL) {
- /* error handling here */
- ext4_mb_release_desc(&e4b);
- BUG_ON(bitmap_bh == NULL);
+ ext4_error(sb, __func__, "Error in reading block "
+ "bitmap for %lu\n", group);
+ return 0;
}
err = ext4_mb_load_buddy(sb, group, &e4b);
- BUG_ON(err != 0); /* error handling here */
+ if (err) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ put_bh(bitmap_bh);
+ return 0;
+ }
if (needed == 0)
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
- grp