aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-04-13 09:57:10 +0200
committerIngo Molnar <mingo@kernel.org>2012-04-13 09:57:10 +0200
commita385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (patch)
treea2c186cb828e3713c2ec48a4d7191166fb798b3d /fs/ext4/ialloc.c
parent659c36fcda403013a01b85da07cf2d9711e6d6c7 (diff)
parent0034102808e0dbbf3a2394b82b1bb40b5778de9e (diff)
Merge tag 'v3.4-rc2' into perf/core
Merge Linux 3.4-rc2: we were on v3.3, update the base. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c260
1 files changed, 107 insertions, 153 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25d8c9781ad..409c2ee7750 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
return EXT4_INODES_PER_GROUP(sb);
}
+void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
+{
+ if (uptodate) {
+ set_buffer_uptodate(bh);
+ set_bitmap_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ put_bh(bh);
+}
+
/*
* Read the inode allocation bitmap for a given block_group, reading
* into the specified slot in the superblock's bitmap cache.
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
/*
- * submit the buffer_head for read. We can
- * safely mark the bitmap as uptodate now.
- * We do it here so the bitmap uptodate bit
- * get set with buffer lock held.
+ * submit the buffer_head for reading
*/
trace_ext4_load_inode_bitmap(sb, block_group);
- set_bitmap_uptodate(bh);
- if (bh_submit_read(bh) < 0) {
+ bh->b_end_io = ext4_end_bitmap_read;
+ get_bh(bh);
+ submit_bh(READ, bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
put_bh(bh);
ext4_error(sb, "Cannot read inode bitmap - "
- "block_group = %u, inode_bitmap = %llu",
- block_group, bitmap_blk);
+ "block_group = %u, inode_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
return bh;
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_sb_info *sbi;
int fatal = 0, err, count, cleared;
- if (atomic_read(&inode->i_count) > 1) {
- printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ if (!sb) {
+ printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
+ "nonexistent device\n", __func__, __LINE__);
return;
}
- if (inode->i_nlink) {
- printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
- inode->i_nlink);
+ if (atomic_read(&inode->i_count) > 1) {
+ ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
+ __func__, __LINE__, inode->i_ino,
+ atomic_read(&inode->i_count));
return;
}
- if (!sb) {
- printk(KERN_ERR "ext4_free_inode: inode on "
- "nonexistent device\n");
+ if (inode->i_nlink) {
+ ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
+ __func__, __LINE__, inode->i_ino, inode->i_nlink);
return;
}
sbi = EXT4_SB(sb);
@@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
}
/*
- * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's ext4_group_lock
- * and clear the uninit flag. The inode bitmap update
- * and group desc uninit flag clear should be done
- * after holding ext4_group_lock so that ext4_read_inode_bitmap
- * doesn't race with the ext4_claim_inode
- */
-static int ext4_claim_inode(struct super_block *sb,
- struct buffer_head *inode_bitmap_bh,
- unsigned long ino, ext4_group_t group, umode_t mode)
-{
- int free = 0, retval = 0, count;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-
- /*
- * We have to be sure that new inode allocation does not race with
- * inode table initialization, because otherwise we may end up
- * allocating and writing new inode right before sb_issue_zeroout
- * takes place and overwriting our new inode with zeroes. So we
- * take alloc_sem to prevent it.
- */
- down_read(&grp->alloc_sem);
- ext4_lock_group(sb, group);
- if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
- /* not a free inode */
- retval = 1;
- goto err_ret;
- }
- ino++;
- if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
- ino > EXT4_INODES_PER_GROUP(sb)) {
- ext4_unlock_group(sb, group);
- up_read(&grp->alloc_sem);
- ext4_error(sb, "reserved inode or inode > inodes count - "
- "block_group = %u, inode=%lu", group,
- ino + group * EXT4_INODES_PER_GROUP(sb));
- return 1;
- }
- /* If we didn't allocate from within the initialized part of the inode
- * table then we need to initialize up to this inode. */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
- /* When marking the block group with
- * ~EXT4_BG_INODE_UNINIT we don't want to depend
- * on the value of bg_itable_unused even though
- * mke2fs could have initialized the same for us.
- * Instead we calculated the value below
- */
-
- free = 0;
- } else {
- free = EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp);
- }
-
- /*
- * Check the relative inode number against the last used
- * relative inode number in this group. if it is greater
- * we need to update the bg_itable_unused count
- *
- */
- if (ino > free)
- ext4_itable_unused_set(sb, gdp,
- (EXT4_INODES_PER_GROUP(sb) - ino));
- }
- count = ext4_free_inodes_count(sb, gdp) - 1;
- ext4_free_inodes_set(sb, gdp, count);
- if (S_ISDIR(mode)) {
- count = ext4_used_dirs_count(sb, gdp) + 1;
- ext4_used_dirs_set(sb, gdp, count);
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t f = ext4_flex_group(sbi, group);
-
- atomic_inc(&sbi->s_flex_groups[f].used_dirs);
- }
- }
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-err_ret:
- ext4_unlock_group(sb, group);
- up_read(&grp->alloc_sem);
- return retval;
-}
-
-/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
* free space and a low directory-to-inode ratio; if that fails, then of
@@ -741,6 +664,11 @@ got_group:
if (ret2 == -1)
goto out;
+ /*
+ * Normally we will only go through one pass of this loop,
+ * unless we get unlucky and it turns out the group we selected
+ * had its last inode grabbed by someone else.
+ */
for (i = 0; i < ngroups; i++, ino = 0) {
err = -EIO;
@@ -757,51 +685,24 @@ repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data,
EXT4_INODES_PER_GROUP(sb), ino);
-
- if (ino < EXT4_INODES_PER_GROUP(sb)) {
-
- BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle,
- inode_bitmap_bh);
- if (err)
- goto fail;
-
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle,
- group_desc_bh);
- if (err)
- goto fail;
- if (!ext4_claim_inode(sb, inode_bitmap_bh,
- ino, group, mode)) {
- /* we won it */
- BUFFER_TRACE(inode_bitmap_bh,
- "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle,
- NULL,
- inode_bitmap_bh);
- if (err)
- goto fail;
- /* zero bit is inode number 1*/
- ino++;
- goto got;
- }
- /* we lost it */
- ext4_handle_release_buffer(handle, inode_bitmap_bh);
- ext4_handle_release_buffer(handle, group_desc_bh);
-
- if (++ino < EXT4_INODES_PER_GROUP(sb))
- goto repeat_in_this_group;
+ if (ino >= EXT4_INODES_PER_GROUP(sb)) {
+ if (++group == ngroups)
+ group = 0;
+ continue;
}
-
- /*
- * This case is possible in concurrent environment. It is very
- * rare. We cannot repeat the find_group_xxx() call because
- * that will simply return the same blockgroup, because the
- * group descriptor metadata has not yet been updated.
- * So we just go onto the next blockgroup.
- */
- if (++group == ngroups)
- group = 0;
+ if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
+ ext4_error(sb, "reserved inode found cleared - "
+ "inode=%lu", ino + 1);
+ continue;
+ }
+ ext4_lock_group(sb, group);
+ ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
+ ext4_unlock_group(sb, group);
+ ino++; /* the inode bitmap is zero-based */
+ if (!ret2)
+ goto got; /* we grabbed the inode! */
+ if (ino < EXT4_INODES_PER_GROUP(sb))
+ goto repeat_in_this_group;
}
err = -ENOSPC;
goto out;
@@ -838,6 +739,59 @@ got:
if (err)
goto fail;
}
+
+ BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+ if (err)
+ goto fail;
+
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, group_desc_bh);
+ if (err)
+ goto fail;
+
+ /* Update the relevant bg descriptor fields */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ int free;
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+ down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
+ ext4_lock_group(sb, group); /* while we modify the bg desc */
+ free = EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp);
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+ free = 0;
+ }
+ /*
+ * Check the relative inode number against the last used
+ * relative inode number in this group. if it is greater
+ * we need to update the bg_itable_unused count
+ */
+ if (ino > free)
+ ext4_itable_unused_set(sb, gdp,
+ (EXT4_INODES_PER_GROUP(sb) - ino));
+ up_read(&grp->alloc_sem);
+ }
+ ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
+ if (S_ISDIR(mode)) {
+ ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t f = ext4_flex_group(sbi, group);
+
+ atomic_inc(&sbi->s_flex_groups[f].used_dirs);
+ }
+ }
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+ ext4_unlock_group(sb, group);
+ }
+
+ BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+ if (err)
+ goto fail;
+
BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
if (err)
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
* where it is called from on active part of filesystem is ext4lazyinit
* thread, so we do not need any special locks, however we have to prevent
* inode allocation from the current group, so we take alloc_sem lock, to
- * block ext4_claim_inode until we are finished.
+ * block ext4_new_inode() until we are finished.
*/
int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int barrier)
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
sbi->s_inodes_per_block);
if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
- ext4_error(sb, "Something is wrong with group %u\n"
- "Used itable blocks: %d"
- "itable unused count: %u\n",
+ ext4_error(sb, "Something is wrong with group %u: "
+ "used itable blocks: %d; "
+ "itable unused count: %u",
group, used_blks,
ext4_itable_unused_count(sb, gdp));
ret = 1;