diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-04-13 09:57:10 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-04-13 09:57:10 +0200 |
commit | a385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (patch) | |
tree | a2c186cb828e3713c2ec48a4d7191166fb798b3d /fs/ext4/ialloc.c | |
parent | 659c36fcda403013a01b85da07cf2d9711e6d6c7 (diff) | |
parent | 0034102808e0dbbf3a2394b82b1bb40b5778de9e (diff) |
Merge tag 'v3.4-rc2' into perf/core
Merge Linux 3.4-rc2: we were on v3.3, update the base.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r-- | fs/ext4/ialloc.c | 260 |
1 files changed, 107 insertions, 153 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25d8c9781ad..409c2ee7750 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, return EXT4_INODES_PER_GROUP(sb); } +void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) +{ + if (uptodate) { + set_buffer_uptodate(bh); + set_bitmap_uptodate(bh); + } + unlock_buffer(bh); + put_bh(bh); +} + /* * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. @@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) return bh; } /* - * submit the buffer_head for read. We can - * safely mark the bitmap as uptodate now. - * We do it here so the bitmap uptodate bit - * get set with buffer lock held. + * submit the buffer_head for reading */ trace_ext4_load_inode_bitmap(sb, block_group); - set_bitmap_uptodate(bh); - if (bh_submit_read(bh) < 0) { + bh->b_end_io = ext4_end_bitmap_read; + get_bh(bh); + submit_bh(READ, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { put_bh(bh); ext4_error(sb, "Cannot read inode bitmap - " - "block_group = %u, inode_bitmap = %llu", - block_group, bitmap_blk); + "block_group = %u, inode_bitmap = %llu", + block_group, bitmap_blk); return NULL; } return bh; @@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) struct ext4_sb_info *sbi; int fatal = 0, err, count, cleared; - if (atomic_read(&inode->i_count) > 1) { - printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", - atomic_read(&inode->i_count)); + if (!sb) { + printk(KERN_ERR "EXT4-fs: %s:%d: inode on " + "nonexistent device\n", __func__, __LINE__); return; } - if (inode->i_nlink) { - printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", - inode->i_nlink); + if (atomic_read(&inode->i_count) > 1) { + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", + __func__, __LINE__, inode->i_ino, + atomic_read(&inode->i_count)); return; } - if (!sb) { - printk(KERN_ERR "ext4_free_inode: inode on " - "nonexistent device\n"); + if (inode->i_nlink) { + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n", + __func__, __LINE__, inode->i_ino, inode->i_nlink); return; } sbi = EXT4_SB(sb); @@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent, } /* - * claim the inode from the inode bitmap. If the group - * is uninit we need to take the groups's ext4_group_lock - * and clear the uninit flag. The inode bitmap update - * and group desc uninit flag clear should be done - * after holding ext4_group_lock so that ext4_read_inode_bitmap - * doesn't race with the ext4_claim_inode - */ -static int ext4_claim_inode(struct super_block *sb, - struct buffer_head *inode_bitmap_bh, - unsigned long ino, ext4_group_t group, umode_t mode) -{ - int free = 0, retval = 0, count; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_group_info *grp = ext4_get_group_info(sb, group); - struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); - - /* - * We have to be sure that new inode allocation does not race with - * inode table initialization, because otherwise we may end up - * allocating and writing new inode right before sb_issue_zeroout - * takes place and overwriting our new inode with zeroes. So we - * take alloc_sem to prevent it. - */ - down_read(&grp->alloc_sem); - ext4_lock_group(sb, group); - if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { - /* not a free inode */ - retval = 1; - goto err_ret; - } - ino++; - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || - ino > EXT4_INODES_PER_GROUP(sb)) { - ext4_unlock_group(sb, group); - up_read(&grp->alloc_sem); - ext4_error(sb, "reserved inode or inode > inodes count - " - "block_group = %u, inode=%lu", group, - ino + group * EXT4_INODES_PER_GROUP(sb)); - return 1; - } - /* If we didn't allocate from within the initialized part of the inode - * table then we need to initialize up to this inode. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { - - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); - /* When marking the block group with - * ~EXT4_BG_INODE_UNINIT we don't want to depend - * on the value of bg_itable_unused even though - * mke2fs could have initialized the same for us. - * Instead we calculated the value below - */ - - free = 0; - } else { - free = EXT4_INODES_PER_GROUP(sb) - - ext4_itable_unused_count(sb, gdp); - } - - /* - * Check the relative inode number against the last used - * relative inode number in this group. if it is greater - * we need to update the bg_itable_unused count - * - */ - if (ino > free) - ext4_itable_unused_set(sb, gdp, - (EXT4_INODES_PER_GROUP(sb) - ino)); - } - count = ext4_free_inodes_count(sb, gdp) - 1; - ext4_free_inodes_set(sb, gdp, count); - if (S_ISDIR(mode)) { - count = ext4_used_dirs_count(sb, gdp) + 1; - ext4_used_dirs_set(sb, gdp, count); - if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, group); - - atomic_inc(&sbi->s_flex_groups[f].used_dirs); - } - } - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); -err_ret: - ext4_unlock_group(sb, group); - up_read(&grp->alloc_sem); - return retval; -} - -/* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both * free space and a low directory-to-inode ratio; if that fails, then of @@ -741,6 +664,11 @@ got_group: if (ret2 == -1) goto out; + /* + * Normally we will only go through one pass of this loop, + * unless we get unlucky and it turns out the group we selected + * had its last inode grabbed by someone else. + */ for (i = 0; i < ngroups; i++, ino = 0) { err = -EIO; @@ -757,51 +685,24 @@ repeat_in_this_group: ino = ext4_find_next_zero_bit((unsigned long *) inode_bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); - - if (ino < EXT4_INODES_PER_GROUP(sb)) { - - BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, - inode_bitmap_bh); - if (err) - goto fail; - - BUFFER_TRACE(group_desc_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, - group_desc_bh); - if (err) - goto fail; - if (!ext4_claim_inode(sb, inode_bitmap_bh, - ino, group, mode)) { - /* we won it */ - BUFFER_TRACE(inode_bitmap_bh, - "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, - NULL, - inode_bitmap_bh); - if (err) - goto fail; - /* zero bit is inode number 1*/ - ino++; - goto got; - } - /* we lost it */ - ext4_handle_release_buffer(handle, inode_bitmap_bh); - ext4_handle_release_buffer(handle, group_desc_bh); - - if (++ino < EXT4_INODES_PER_GROUP(sb)) - goto repeat_in_this_group; + if (ino >= EXT4_INODES_PER_GROUP(sb)) { + if (++group == ngroups) + group = 0; + continue; } - - /* - * This case is possible in concurrent environment. It is very - * rare. We cannot repeat the find_group_xxx() call because - * that will simply return the same blockgroup, because the - * group descriptor metadata has not yet been updated. - * So we just go onto the next blockgroup. - */ - if (++group == ngroups) - group = 0; + if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { + ext4_error(sb, "reserved inode found cleared - " + "inode=%lu", ino + 1); + continue; + } + ext4_lock_group(sb, group); + ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); + ext4_unlock_group(sb, group); + ino++; /* the inode bitmap is zero-based */ + if (!ret2) + goto got; /* we grabbed the inode! */ + if (ino < EXT4_INODES_PER_GROUP(sb)) + goto repeat_in_this_group; } err = -ENOSPC; goto out; @@ -838,6 +739,59 @@ got: if (err) goto fail; } + + BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, inode_bitmap_bh); + if (err) + goto fail; + + BUFFER_TRACE(group_desc_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, group_desc_bh); + if (err) + goto fail; + + /* Update the relevant bg descriptor fields */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + int free; + struct ext4_group_info *grp = ext4_get_group_info(sb, group); + + down_read(&grp->alloc_sem); /* protect vs itable lazyinit */ + ext4_lock_group(sb, group); /* while we modify the bg desc */ + free = EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp); + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + free = 0; + } + /* + * Check the relative inode number against the last used + * relative inode number in this group. if it is greater + * we need to update the bg_itable_unused count + */ + if (ino > free) + ext4_itable_unused_set(sb, gdp, + (EXT4_INODES_PER_GROUP(sb) - ino)); + up_read(&grp->alloc_sem); + } + ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); + if (S_ISDIR(mode)) { + ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); + if (sbi->s_log_groups_per_flex) { + ext4_group_t f = ext4_flex_group(sbi, group); + + atomic_inc(&sbi->s_flex_groups[f].used_dirs); + } + } + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); + ext4_unlock_group(sb, group); + } + + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); + if (err) + goto fail; + BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); if (err) @@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) * where it is called from on active part of filesystem is ext4lazyinit * thread, so we do not need any special locks, however we have to prevent * inode allocation from the current group, so we take alloc_sem lock, to - * block ext4_claim_inode until we are finished. + * block ext4_new_inode() until we are finished. */ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, int barrier) @@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, sbi->s_inodes_per_block); if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { - ext4_error(sb, "Something is wrong with group %u\n" - "Used itable blocks: %d" - "itable unused count: %u\n", + ext4_error(sb, "Something is wrong with group %u: " + "used itable blocks: %d; " + "itable unused count: %u", group, used_blks, ext4_itable_unused_count(sb, gdp)); ret = 1; |