aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c537
1 files changed, 330 insertions, 207 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25d8c9781ad..5b87fc36aab 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -70,28 +70,49 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
+ struct ext4_group_info *grp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
-
J_ASSERT_BH(bh, buffer_locked(bh));
/* If checksum is bad mark all blocks and inodes use to prevent
* allocation, essentially implementing a per-group read-only flag. */
- if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
- ext4_free_group_clusters_set(sb, gdp, 0);
- ext4_free_inodes_set(sb, gdp, 0);
- ext4_itable_unused_set(sb, gdp, 0);
- memset(bh->b_data, 0xff, sb->s_blocksize);
+ grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
+ set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
+ set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return 0;
}
memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
+ ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
+ EXT4_INODES_PER_GROUP(sb) / 8);
+ ext4_group_desc_csum_set(sb, block_group, gdp);
return EXT4_INODES_PER_GROUP(sb);
}
+void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
+{
+ if (uptodate) {
+ set_buffer_uptodate(bh);
+ set_bitmap_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ put_bh(bh);
+}
+
/*
* Read the inode allocation bitmap for a given block_group, reading
* into the specified slot in the superblock's bitmap cache.
@@ -104,6 +125,8 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
struct ext4_group_desc *desc;
struct buffer_head *bh = NULL;
ext4_fsblk_t bitmap_blk;
+ struct ext4_group_info *grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
@@ -118,12 +141,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return NULL;
}
if (bitmap_uptodate(bh))
- return bh;
+ goto verify;
lock_buffer(bh);
if (bitmap_uptodate(bh)) {
unlock_buffer(bh);
- return bh;
+ goto verify;
}
ext4_lock_group(sb, block_group);
@@ -131,6 +154,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
ext4_init_inode_bitmap(sb, bh, block_group, desc);
set_bitmap_uptodate(bh);
set_buffer_uptodate(bh);
+ set_buffer_verified(bh);
ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
return bh;
@@ -144,23 +168,45 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
*/
set_bitmap_uptodate(bh);
unlock_buffer(bh);
- return bh;
+ goto verify;
}
/*
- * submit the buffer_head for read. We can
- * safely mark the bitmap as uptodate now.
- * We do it here so the bitmap uptodate bit
- * get set with buffer lock held.
+ * submit the buffer_head for reading
*/
trace_ext4_load_inode_bitmap(sb, block_group);
- set_bitmap_uptodate(bh);
- if (bh_submit_read(bh) < 0) {
+ bh->b_end_io = ext4_end_bitmap_read;
+ get_bh(bh);
+ submit_bh(READ | REQ_META | REQ_PRIO, bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
put_bh(bh);
ext4_error(sb, "Cannot read inode bitmap - "
- "block_group = %u, inode_bitmap = %llu",
- block_group, bitmap_blk);
+ "block_group = %u, inode_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
+
+verify:
+ ext4_lock_group(sb, block_group);
+ if (!buffer_verified(bh) &&
+ !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
+ EXT4_INODES_PER_GROUP(sb) / 8)) {
+ ext4_unlock_group(sb, block_group);
+ put_bh(bh);
+ ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
+ "inode_bitmap = %llu", block_group, bitmap_blk);
+ grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, desc);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
+ set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+ return NULL;
+ }
+ ext4_unlock_group(sb, block_group);
+ set_buffer_verified(bh);
return bh;
}
@@ -193,20 +239,22 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_super_block *es;
struct ext4_sb_info *sbi;
int fatal = 0, err, count, cleared;
+ struct ext4_group_info *grp;
- if (atomic_read(&inode->i_count) > 1) {
- printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ if (!sb) {
+ printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
+ "nonexistent device\n", __func__, __LINE__);
return;
}
- if (inode->i_nlink) {
- printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
- inode->i_nlink);
+ if (atomic_read(&inode->i_count) > 1) {
+ ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
+ __func__, __LINE__, inode->i_ino,
+ atomic_read(&inode->i_count));
return;
}
- if (!sb) {
- printk(KERN_ERR "ext4_free_inode: inode on "
- "nonexistent device\n");
+ if (inode->i_nlink) {
+ ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
+ __func__, __LINE__, inode->i_ino, inode->i_nlink);
return;
}
sbi = EXT4_SB(sb);
@@ -237,7 +285,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
- if (!bitmap_bh)
+ /* Don't bother if the inode bitmap is corrupt. */
+ grp = ext4_get_group_info(sb, block_group);
+ if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh)
goto error_return;
BUFFER_TRACE(bitmap_bh, "get_write_access");
@@ -265,7 +315,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ext4_used_dirs_set(sb, gdp, count);
percpu_counter_dec(&sbi->s_dirs_counter);
}
- gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
+ ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
+ EXT4_INODES_PER_GROUP(sb) / 8);
+ ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
percpu_counter_inc(&sbi->s_freeinodes_counter);
@@ -284,9 +336,16 @@ out:
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
- ext4_mark_super_dirty(sb);
- } else
+ } else {
ext4_error(sb, "bit already cleared for inode %lu", ino);
+ if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
+ set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+ }
error_return:
brelse(bitmap_bh);
@@ -294,8 +353,8 @@ error_return:
}
struct orlov_stats {
+ __u64 free_clusters;
__u32 free_inodes;
- __u32 free_clusters;
__u32 used_dirs;
};
@@ -312,7 +371,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
if (flex_size > 1) {
stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
- stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
+ stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
return;
}
@@ -396,7 +455,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
grp = hinfo.hash;
} else
- get_random_bytes(&grp, sizeof(grp));
+ grp = prandom_u32();
parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) {
g = (parent_group + i) % ngroups;
@@ -477,10 +536,12 @@ fallback_retry:
for (i = 0; i < ngroups; i++) {
grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, grp, NULL);
- grp_free = ext4_free_inodes_count(sb, desc);
- if (desc && grp_free && grp_free >= avefreei) {
- *group = grp;
- return 0;
+ if (desc) {
+ grp_free = ext4_free_inodes_count(sb, desc);
+ if (grp_free && grp_free >= avefreei) {
+ *group = grp;
+ return 0;
+ }
}
}
@@ -593,91 +654,48 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
}
/*
- * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's ext4_group_lock
- * and clear the uninit flag. The inode bitmap update
- * and group desc uninit flag clear should be done
- * after holding ext4_group_lock so that ext4_read_inode_bitmap
- * doesn't race with the ext4_claim_inode
+ * In no journal mode, if an inode has recently been deleted, we want
+ * to avoid reusing it until we're reasonably sure the inode table
+ * block has been written back to disk. (Yes, these values are
+ * somewhat arbitrary...)
*/
-static int ext4_claim_inode(struct super_block *sb,
- struct buffer_head *inode_bitmap_bh,
- unsigned long ino, ext4_group_t group, umode_t mode)
-{
- int free = 0, retval = 0, count;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-
- /*
- * We have to be sure that new inode allocation does not race with
- * inode table initialization, because otherwise we may end up
- * allocating and writing new inode right before sb_issue_zeroout
- * takes place and overwriting our new inode with zeroes. So we
- * take alloc_sem to prevent it.
- */
- down_read(&grp->alloc_sem);
- ext4_lock_group(sb, group);
- if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
- /* not a free inode */
- retval = 1;
- goto err_ret;
- }
- ino++;
- if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
- ino > EXT4_INODES_PER_GROUP(sb)) {
- ext4_unlock_group(sb, group);
- up_read(&grp->alloc_sem);
- ext4_error(sb, "reserved inode or inode > inodes count - "
- "block_group = %u, inode=%lu", group,
- ino + group * EXT4_INODES_PER_GROUP(sb));
- return 1;
- }
- /* If we didn't allocate from within the initialized part of the inode
- * table then we need to initialize up to this inode. */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
- /* When marking the block group with
- * ~EXT4_BG_INODE_UNINIT we don't want to depend
- * on the value of bg_itable_unused even though
- * mke2fs could have initialized the same for us.
- * Instead we calculated the value below
- */
+#define RECENTCY_MIN 5
+#define RECENTCY_DIRTY 30
- free = 0;
- } else {
- free = EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp);
- }
+static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
+{
+ struct ext4_group_desc *gdp;
+ struct ext4_inode *raw_inode;
+ struct buffer_head *bh;
+ unsigned long dtime, now;
+ int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
+ int offset, ret = 0, recentcy = RECENTCY_MIN;
+
+ gdp = ext4_get_group_desc(sb, group, NULL);
+ if (unlikely(!gdp))
+ return 0;
+ bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
+ (ino / inodes_per_block));
+ if (unlikely(!bh) || !buffer_uptodate(bh))
/*
- * Check the relative inode number against the last used
- * relative inode number in this group. if it is greater
- * we need to update the bg_itable_unused count
- *
+ * If the block is not in the buffer cache, then it
+ * must have been written out.
*/
- if (ino > free)
- ext4_itable_unused_set(sb, gdp,
- (EXT4_INODES_PER_GROUP(sb) - ino));
- }
- count = ext4_free_inodes_count(sb, gdp) - 1;
- ext4_free_inodes_set(sb, gdp, count);
- if (S_ISDIR(mode)) {
- count = ext4_used_dirs_count(sb, gdp) + 1;
- ext4_used_dirs_set(sb, gdp, count);
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t f = ext4_flex_group(sbi, group);
+ goto out;
- atomic_inc(&sbi->s_flex_groups[f].used_dirs);
- }
- }
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-err_ret:
- ext4_unlock_group(sb, group);
- up_read(&grp->alloc_sem);
- return retval;
+ offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
+ raw_inode = (struct ext4_inode *) (bh->b_data + offset);
+ dtime = le32_to_cpu(raw_inode->i_dtime);
+ now = get_seconds();
+ if (buffer_dirty(bh))
+ recentcy += RECENTCY_DIRTY;
+
+ if (dtime && (dtime < now) && (now < dtime + recentcy))
+ ret = 1;
+out:
+ brelse(bh);
+ return ret;
}
/*
@@ -690,8 +708,10 @@ err_ret:
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
-struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
- const struct qstr *qstr, __u32 goal, uid_t *owner)
+struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
+ umode_t mode, const struct qstr *qstr,
+ __u32 goal, uid_t *owner, int handle_type,
+ unsigned int line_no, int nblocks)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
@@ -706,6 +726,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
struct inode *ret;
ext4_group_t i;
ext4_group_t flex_group;
+ struct ext4_group_info *grp;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
@@ -720,6 +741,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
ei = EXT4_I(inode);
sbi = EXT4_SB(sb);
+ /*
+ * Initalize owners and quota early so that we don't have to account
+ * for quota initialization worst case in standard inode creating
+ * transaction
+ */
+ if (owner) {
+ inode->i_mode = mode;
+ i_uid_write(inode, owner[0]);
+ i_gid_write(inode, owner[1]);
+ } else if (test_opt(sb, GRPID)) {
+ inode->i_mode = mode;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = dir->i_gid;
+ } else
+ inode_init_owner(inode, dir, mode);
+ dquot_initialize(inode);
+
if (!goal)
goal = sbi->s_inode_goal;
@@ -741,65 +779,87 @@ got_group:
if (ret2 == -1)
goto out;
+ /*
+ * Normally we will only go through one pass of this loop,
+ * unless we get unlucky and it turns out the group we selected
+ * had its last inode grabbed by someone else.
+ */
for (i = 0; i < ngroups; i++, ino = 0) {
err = -EIO;
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
if (!gdp)
- goto fail;
+ goto out;
+
+ /*
+ * Check free inodes count before loading bitmap.
+ */
+ if (ext4_free_inodes_count(sb, gdp) == 0) {
+ if (++group == ngroups)
+ group = 0;
+ continue;
+ }
+
+ grp = ext4_get_group_info(sb, group);
+ /* Skip groups with already-known suspicious inode tables */
+ if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ if (++group == ngroups)
+ group = 0;
+ continue;
+ }
brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
- if (!inode_bitmap_bh)
- goto fail;
+ /* Skip groups with suspicious inode tables */
+ if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) {
+ if (++group == ngroups)
+ group = 0;
+ continue;
+ }
repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data,
EXT4_INODES_PER_GROUP(sb), ino);
-
- if (ino < EXT4_INODES_PER_GROUP(sb)) {
-
- BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle,
- inode_bitmap_bh);
- if (err)
- goto fail;
-
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle,
- group_desc_bh);
- if (err)
- goto fail;
- if (!ext4_claim_inode(sb, inode_bitmap_bh,
- ino, group, mode)) {
- /* we won it */
- BUFFER_TRACE(inode_bitmap_bh,
- "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle,
- NULL,
- inode_bitmap_bh);
- if (err)
- goto fail;
- /* zero bit is inode number 1*/
- ino++;
- goto got;
+ if (ino >= EXT4_INODES_PER_GROUP(sb))
+ goto next_group;
+ if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
+ ext4_error(sb, "reserved inode found cleared - "
+ "inode=%lu", ino + 1);
+ continue;
+ }
+ if ((EXT4_SB(sb)->s_journal == NULL) &&
+ recently_deleted(sb, group, ino)) {
+ ino++;
+ goto next_inode;
+ }
+ if (!handle) {
+ BUG_ON(nblocks <= 0);
+ handle = __ext4_journal_start_sb(dir->i_sb, line_no,
+ handle_type, nblocks,
+ 0);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ ext4_std_error(sb, err);
+ goto out;
}
- /* we lost it */
- ext4_handle_release_buffer(handle, inode_bitmap_bh);
- ext4_handle_release_buffer(handle, group_desc_bh);
-
- if (++ino < EXT4_INODES_PER_GROUP(sb))
- goto repeat_in_this_group;
}
-
- /*
- * This case is possible in concurrent environment. It is very
- * rare. We cannot repeat the find_group_xxx() call because
- * that will simply return the same blockgroup, because the
- * group descriptor metadata has not yet been updated.
- * So we just go onto the next blockgroup.
- */
+ BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
+ ext4_lock_group(sb, group);
+ ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
+ ext4_unlock_group(sb, group);
+ ino++; /* the inode bitmap is zero-based */
+ if (!ret2)
+ goto got; /* we grabbed the inode! */
+next_inode:
+ if (ino < EXT4_INODES_PER_GROUP(sb))
+ goto repeat_in_this_group;
+next_group:
if (++group == ngroups)
group = 0;
}
@@ -807,8 +867,22 @@ repeat_in_this_group:
goto out;
got:
+ BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
+
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, group_desc_bh);
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
+
/* We may have to initialize the block bitmap if it isn't already */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+ if (ext4_has_group_desc_csum(sb) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
struct buffer_head *block_bitmap_bh;
@@ -817,12 +891,12 @@ got:
err = ext4_journal_get_write_access(handle, block_bitmap_bh);
if (err) {
brelse(block_bitmap_bh);
- goto fail;
+ ext4_std_error(sb, err);
+ goto out;
}
BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
- brelse(block_bitmap_bh);
/* recheck and clear flag under lock if we still need to */
ext4_lock_group(sb, group);
@@ -830,38 +904,76 @@ got:
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp));
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
- gdp);
+ ext4_block_bitmap_csum_set(sb, group, gdp,
+ block_bitmap_bh);
+ ext4_group_desc_csum_set(sb, group, gdp);
}
ext4_unlock_group(sb, group);
+ brelse(block_bitmap_bh);
- if (err)
- goto fail;
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
}
+
+ /* Update the relevant bg descriptor fields */
+ if (ext4_has_group_desc_csum(sb)) {
+ int free;
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+ down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
+ ext4_lock_group(sb, group); /* while we modify the bg desc */
+ free = EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp);
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+ free = 0;
+ }
+ /*
+ * Check the relative inode number against the last used
+ * relative inode number in this group. if it is greater
+ * we need to update the bg_itable_unused count
+ */
+ if (ino > free)
+ ext4_itable_unused_set(sb, gdp,
+ (EXT4_INODES_PER_GROUP(sb) - ino));
+ up_read(&grp->alloc_sem);
+ } else {
+ ext4_lock_group(sb, group);
+ }
+
+ ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
+ if (S_ISDIR(mode)) {
+ ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t f = ext4_flex_group(sbi, group);
+
+ atomic_inc(&sbi->s_flex_groups[f].used_dirs);
+ }
+ }
+ if (ext4_has_group_desc_csum(sb)) {
+ ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh,
+ EXT4_INODES_PER_GROUP(sb) / 8);
+ ext4_group_desc_csum_set(sb, group, gdp);
+ }
+ ext4_unlock_group(sb, group);
+
BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
- if (err)
- goto fail;
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
- ext4_mark_super_dirty(sb);
if (sbi->s_log_groups_per_flex) {
flex_group = ext4_flex_group(sbi, group);
atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
}
- if (owner) {
- inode->i_mode = mode;
- inode->i_uid = owner[0];
- inode->i_gid = owner[1];
- } else if (test_opt(sb, GRPID)) {
- inode->i_mode = mode;
- inode->i_uid = current_fsuid();
- inode->i_gid = dir->i_gid;
- } else
- inode_init_owner(inode, dir, mode);
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
/* This is the optimal IO size (for stat), not the fs block size */
@@ -890,19 +1002,36 @@ got:
* twice.
*/
err = -EIO;
- goto fail;
+ ext4_error(sb, "failed to insert inode %lu: doubly allocated?",
+ inode->i_ino);
+ goto out;
}
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock);
+ /* Precompute checksum seed for inode metadata */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ __u32 csum;
+ __le32 inum = cpu_to_le32(inode->i_ino);
+ __le32 gen = cpu_to_le32(inode->i_generation);
+ csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
+ sizeof(inum));
+ ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
+ sizeof(gen));
+ }
+
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
ext4_set_inode_state(inode, EXT4_STATE_NEW);
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
+ ei->i_inline_off = 0;
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
+ ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+
ret = inode;
- dquot_initialize(inode);
err = dquot_alloc_inode(inode);
if (err)
goto fail_drop;
@@ -936,24 +1065,17 @@ got:
ext4_debug("allocating inode %lu\n", inode->i_ino);
trace_ext4_allocate_inode(inode, dir, mode);
- goto really_out;
-fail:
- ext4_std_error(sb, err);
-out:
- iput(inode);
- ret = ERR_PTR(err);
-really_out:
brelse(inode_bitmap_bh);
return ret;
fail_free_drop:
dquot_free_inode(inode);
-
fail_drop:
- dquot_drop(inode);
- inode->i_flags |= S_NOQUOTA;
clear_nlink(inode);
unlock_new_inode(inode);
+out:
+ dquot_drop(inode);
+ inode->i_flags |= S_NOQUOTA;
iput(inode);
brelse(inode_bitmap_bh);
return ERR_PTR(err);
@@ -1012,17 +1134,17 @@ iget_failed:
inode = NULL;
bad_orphan:
ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
- printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
+ printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
bit, (unsigned long long)bitmap_bh->b_blocknr,
ext4_test_bit(bit, bitmap_bh->b_data));
- printk(KERN_NOTICE "inode=%p\n", inode);
+ printk(KERN_WARNING "inode=%p\n", inode);
if (inode) {
- printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+ printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
is_bad_inode(inode));
- printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
+ printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
- printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
- printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
+ printk(KERN_WARNING "max_ino=%lu\n", max_ino);
+ printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
@@ -1057,7 +1179,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
if (!bitmap_bh)
continue;
- x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
+ x = ext4_count_free(bitmap_bh->b_data,
+ EXT4_INODES_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
(unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
bitmap_count += x;
@@ -1101,7 +1224,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
* where it is called from on active part of filesystem is ext4lazyinit
* thread, so we do not need any special locks, however we have to prevent
* inode allocation from the current group, so we take alloc_sem lock, to
- * block ext4_claim_inode until we are finished.
+ * block ext4_new_inode() until we are finished.
*/
int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int barrier)
@@ -1131,7 +1254,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
goto out;
- handle = ext4_journal_start_sb(sb, 1);
+ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -1149,9 +1272,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
sbi->s_inodes_per_block);
if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
- ext4_error(sb, "Something is wrong with group %u\n"
- "Used itable blocks: %d"
- "itable unused count: %u\n",
+ ext4_error(sb, "Something is wrong with group %u: "
+ "used itable blocks: %d; "
+ "itable unused count: %u",
group, used_blks,
ext4_itable_unused_count(sb, gdp));
ret = 1;
@@ -1186,7 +1309,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
skip_zeroout:
ext4_lock_group(sb, group);
gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+ ext4_group_desc_csum_set(sb, group, gdp);
ext4_unlock_group(sb, group);
BUFFER_TRACE(group_desc_bh,