diff options
Diffstat (limited to 'fs/ocfs2/suballoc.c')
| -rw-r--r-- | fs/ocfs2/suballoc.c | 515 |
1 files changed, 351 insertions, 164 deletions
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index a8e6a95a353..0cb889a17ae 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -29,7 +29,6 @@ #include <linux/slab.h> #include <linux/highmem.h> -#define MLOG_MASK_PREFIX ML_DISK_ALLOC #include <cluster/masklog.h> #include "ocfs2.h" @@ -44,6 +43,7 @@ #include "super.h" #include "sysfile.h" #include "uptodate.h" +#include "ocfs2_trace.h" #include "buffer_head_io.h" @@ -57,11 +57,28 @@ struct ocfs2_suballoc_result { u64 sr_bg_blkno; /* The bg we allocated from. Set to 0 when a block group is contiguous. */ + u64 sr_bg_stable_blkno; /* + * Doesn't change, always + * set to target block + * group descriptor + * block. + */ u64 sr_blkno; /* The first allocated block */ unsigned int sr_bit_offset; /* The bit in the bg */ unsigned int sr_bits; /* How many bits we claimed */ }; +static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) +{ + if (res->sr_blkno == 0) + return 0; + + if (res->sr_bg_blkno) + return res->sr_bg_blkno; + + return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); +} + static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); @@ -96,12 +113,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, struct ocfs2_suballoc_result *res); static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr); -static inline int ocfs2_block_group_set_bits(handle_t *handle, - struct inode *alloc_inode, - struct ocfs2_group_desc *bg, - struct buffer_head *group_bh, - unsigned int bit_off, - unsigned int num_bits); static int ocfs2_relink_block_group(handle_t *handle, struct inode *alloc_inode, struct buffer_head *fe_bh, @@ -138,6 +149,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) brelse(ac->ac_bh); ac->ac_bh = NULL; ac->ac_resv = NULL; + if (ac->ac_find_loc_priv) { + kfree(ac->ac_find_loc_priv); + ac->ac_find_loc_priv = NULL; + } } void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) @@ -287,8 +302,8 @@ static int ocfs2_validate_group_descriptor(struct super_block *sb, int rc; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; - mlog(0, "Validating group descriptor %llu\n", - (unsigned long long)bh->b_blocknr); + trace_ocfs2_validate_group_descriptor( + (unsigned long long)bh->b_blocknr); BUG_ON(!buffer_uptodate(bh)); @@ -336,7 +351,7 @@ out: static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, struct ocfs2_group_desc *bg, struct ocfs2_chain_list *cl, - u64 p_blkno, u32 clusters) + u64 p_blkno, unsigned int clusters) { struct ocfs2_extent_list *el = &bg->bg_list; struct ocfs2_extent_rec *rec; @@ -348,7 +363,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, rec->e_blkno = cpu_to_le64(p_blkno); rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc)); - rec->e_leaf_clusters = cpu_to_le32(clusters); + rec->e_leaf_clusters = cpu_to_le16(clusters); le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); le16_add_cpu(&bg->bg_free_bits_count, clusters * le16_to_cpu(cl->cl_bpc)); @@ -368,8 +383,6 @@ static int ocfs2_block_group_fill(handle_t *handle, struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct super_block * sb = alloc_inode->i_sb; - mlog_entry_void(); - if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { ocfs2_error(alloc_inode->i_sb, "group block (%llu) != " "b_blocknr (%llu)", @@ -415,7 +428,8 @@ static int ocfs2_block_group_fill(handle_t *handle, * allocation time. */ bail: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -456,12 +470,12 @@ ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle, /* setup the group */ bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); - mlog(0, "new descriptor, record %u, at block %llu\n", - alloc_rec, (unsigned long long)bg_blkno); + trace_ocfs2_block_group_alloc_contig( + (unsigned long long)bg_blkno, alloc_rec); bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto bail; } @@ -580,7 +594,7 @@ static void ocfs2_bg_alloc_cleanup(handle_t *handle, ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode, cluster_ac->ac_bh, le64_to_cpu(rec->e_blkno), - le32_to_cpu(rec->e_leaf_clusters)); + le16_to_cpu(rec->e_leaf_clusters)); if (ret) mlog_errno(ret); /* Try all the clusters to free */ @@ -622,7 +636,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, * cluster groups will be staying in cache for the duration of * this operation. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; /* Claim the first region */ status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, @@ -636,12 +650,12 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, /* setup the group */ bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); - mlog(0, "new descriptor, record %u, at block %llu\n", - alloc_rec, (unsigned long long)bg_blkno); + trace_ocfs2_block_group_alloc_discontig( + (unsigned long long)bg_blkno, alloc_rec); bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto bail; } @@ -686,8 +700,6 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); - mlog_entry_void(); - cl = &fe->id2.i_chain; status = ocfs2_reserve_clusters_with_limit(osb, le16_to_cpu(cl->cl_cpg), @@ -709,8 +721,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, } if (last_alloc_group && *last_alloc_group != 0) { - mlog(0, "use old allocation group %llu for block group alloc\n", - (unsigned long long)*last_alloc_group); + trace_ocfs2_block_group_alloc( + (unsigned long long)*last_alloc_group); ac->ac_last_group = *last_alloc_group; } @@ -759,6 +771,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); + ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0); status = 0; @@ -775,7 +788,8 @@ bail: brelse(bg_bh); - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -793,8 +807,6 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_dinode *fe; u32 free_bits; - mlog_entry_void(); - alloc_inode = ocfs2_get_system_file_inode(osb, type, slot); if (!alloc_inode) { mlog_errno(-EINVAL); @@ -834,16 +846,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, if (bits_wanted > free_bits) { /* cluster bitmap never grows */ if (ocfs2_is_cluster_bitmap(alloc_inode)) { - mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", - bits_wanted, free_bits); + trace_ocfs2_reserve_suballoc_bits_nospc(bits_wanted, + free_bits); status = -ENOSPC; goto bail; } if (!(flags & ALLOC_NEW_GROUP)) { - mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " - "and we don't alloc a new group for it.\n", - slot, bits_wanted, free_bits); + trace_ocfs2_reserve_suballoc_bits_no_new_group( + slot, bits_wanted, free_bits); status = -ENOSPC; goto bail; } @@ -869,7 +880,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, bail: brelse(bh); - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -1031,7 +1043,8 @@ bail: *ac = NULL; } - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -1098,8 +1111,8 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, spin_lock(&osb->osb_lock); osb->osb_inode_alloc_group = alloc_group; spin_unlock(&osb->osb_lock); - mlog(0, "after reservation, new allocation group is " - "%llu\n", (unsigned long long)alloc_group); + trace_ocfs2_reserve_new_inode_new_group( + (unsigned long long)alloc_group); /* * Some inodes must be freed by us, so try to allocate @@ -1131,7 +1144,8 @@ bail: *ac = NULL; } - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -1168,8 +1182,6 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, { int status; - mlog_entry_void(); - *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); if (!(*ac)) { status = -ENOMEM; @@ -1208,7 +1220,8 @@ bail: *ac = NULL; } - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -1325,7 +1338,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, return status; } -static inline int ocfs2_block_group_set_bits(handle_t *handle, +int ocfs2_block_group_set_bits(handle_t *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, struct buffer_head *group_bh, @@ -1336,15 +1349,12 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, void *bitmap = bg->bg_bitmap; int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; - mlog_entry_void(); - /* All callers get the descriptor via * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); - mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, - num_bits); + trace_ocfs2_block_group_set_bits(bit_off, num_bits); if (ocfs2_is_cluster_bitmap(alloc_inode)) journal_type = OCFS2_JOURNAL_ACCESS_UNDO; @@ -1359,13 +1369,20 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, } le16_add_cpu(&bg->bg_free_bits_count, -num_bits); + if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { + ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" + " count %u but claims %u are freed. num_bits %d", + (unsigned long long)le64_to_cpu(bg->bg_blkno), + le16_to_cpu(bg->bg_bits), + le16_to_cpu(bg->bg_free_bits_count), num_bits); + return -EROFS; + } while(num_bits--) ocfs2_set_bit(bit_off++, bitmap); ocfs2_journal_dirty(handle, group_bh); bail: - mlog_exit(status); return status; } @@ -1398,7 +1415,7 @@ static int ocfs2_relink_block_group(handle_t *handle, int status; /* there is a really tiny chance the journal calls could fail, * but we wouldn't want inconsistent blocks in *any* case. */ - u64 fe_ptr, bg_ptr, prev_bg_ptr; + u64 bg_ptr, prev_bg_ptr; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; @@ -1408,55 +1425,49 @@ static int ocfs2_relink_block_group(handle_t *handle, BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg)); - mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", - (unsigned long long)le64_to_cpu(fe->i_blkno), chain, - (unsigned long long)le64_to_cpu(bg->bg_blkno), - (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); + trace_ocfs2_relink_block_group( + (unsigned long long)le64_to_cpu(fe->i_blkno), chain, + (unsigned long long)le64_to_cpu(bg->bg_blkno), + (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); - fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); bg_ptr = le64_to_cpu(bg->bg_next_group); prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), prev_bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } + if (status < 0) + goto out; prev_bg->bg_next_group = bg->bg_next_group; ocfs2_journal_dirty(handle, prev_bg_bh); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } + if (status < 0) + goto out_rollback_prev_bg; bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; ocfs2_journal_dirty(handle, bg_bh); status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } + if (status < 0) + goto out_rollback_bg; fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; ocfs2_journal_dirty(handle, fe_bh); -out_rollback: - if (status < 0) { - fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); - bg->bg_next_group = cpu_to_le64(bg_ptr); - prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); - } - - mlog_exit(status); +out: + if (status < 0) + mlog_errno(status); return status; + +out_rollback_bg: + bg->bg_next_group = cpu_to_le64(bg_ptr); +out_rollback_prev_bg: + prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); + goto out; } static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, @@ -1486,7 +1497,7 @@ static int ocfs2_cluster_group_search(struct inode *inode, max_bits = le16_to_cpu(gd->bg_bits); /* Tail groups in cluster bitmaps which aren't cpg - * aligned are prone to partial extention by a failed + * aligned are prone to partial extension by a failed * fs resize. If the file system resize never got to * update the dinode cluster count, then we don't want * to trust any clusters past it, regardless of what @@ -1496,10 +1507,10 @@ static int ocfs2_cluster_group_search(struct inode *inode, if ((gd_cluster_off + max_bits) > OCFS2_I(inode)->ip_clusters) { max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; - mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - OCFS2_I(inode)->ip_clusters, max_bits); + trace_ocfs2_cluster_group_search_wrong_max_bits( + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + OCFS2_I(inode)->ip_clusters, max_bits); } ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), @@ -1513,9 +1524,9 @@ static int ocfs2_cluster_group_search(struct inode *inode, gd_cluster_off + res->sr_bit_offset + res->sr_bits); - mlog(0, "Checking %llu against %llu\n", - (unsigned long long)blkoff, - (unsigned long long)max_block); + trace_ocfs2_cluster_group_search_max_block( + (unsigned long long)blkoff, + (unsigned long long)max_block); if (blkoff > max_block) return -ENOSPC; } @@ -1559,9 +1570,9 @@ static int ocfs2_block_group_search(struct inode *inode, if (!ret && max_block) { blkoff = le64_to_cpu(bg->bg_blkno) + res->sr_bit_offset + res->sr_bits; - mlog(0, "Checking %llu against %llu\n", - (unsigned long long)blkoff, - (unsigned long long)max_block); + trace_ocfs2_block_group_search_max_block( + (unsigned long long)blkoff, + (unsigned long long)max_block); if (blkoff > max_block) ret = -ENOSPC; } @@ -1570,7 +1581,7 @@ static int ocfs2_block_group_search(struct inode *inode, return ret; } -static int ocfs2_alloc_dinode_update_counts(struct inode *inode, +int ocfs2_alloc_dinode_update_counts(struct inode *inode, handle_t *handle, struct buffer_head *di_bh, u32 num_bits, @@ -1597,13 +1608,28 @@ out: return ret; } +void ocfs2_rollback_alloc_dinode_counts(struct inode *inode, + struct buffer_head *di_bh, + u32 num_bits, + u16 chain) +{ + u32 tmp_used; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; + struct ocfs2_chain_list *cl; + + cl = (struct ocfs2_chain_list *)&di->id2.i_chain; + tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); + di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits); + le32_add_cpu(&cl->cl_recs[chain].c_free, num_bits); +} + static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, struct ocfs2_extent_rec *rec, struct ocfs2_chain_list *cl) { unsigned int bpc = le16_to_cpu(cl->cl_bpc); unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc; - unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc; + unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc; if (res->sr_bit_offset < bitoff) return 0; @@ -1678,6 +1704,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, if (!ret) ocfs2_bg_discontig_fix_result(ac, gd, res); + /* + * sr_bg_blkno might have been changed by + * ocfs2_bg_discontig_fix_result + */ + res->sr_bg_stable_blkno = group_bh->b_blocknr; + + if (ac->ac_find_loc_only) + goto out_loc_only; + ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, res->sr_bits, le16_to_cpu(gd->bg_chain)); @@ -1688,9 +1723,14 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, res->sr_bit_offset, res->sr_bits); - if (ret < 0) + if (ret < 0) { + ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh, + res->sr_bits, + le16_to_cpu(gd->bg_chain)); mlog_errno(ret); + } +out_loc_only: *bits_left = le16_to_cpu(gd->bg_free_bits_count); out: @@ -1708,7 +1748,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; - u32 tmp_used; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1718,9 +1757,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, struct ocfs2_group_desc *bg; chain = ac->ac_chain; - mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", - bits_wanted, chain, - (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); + trace_ocfs2_search_chain_begin( + (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, + bits_wanted, chain); status = ocfs2_read_group_descriptor(alloc_inode, fe, le64_to_cpu(cl->cl_recs[chain].c_blkno), @@ -1761,8 +1800,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } - mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", - res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); + trace_ocfs2_search_chain_succ( + (unsigned long long)le64_to_cpu(bg->bg_blkno), res->sr_bits); res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno); @@ -1770,6 +1809,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, if (!status) ocfs2_bg_discontig_fix_result(ac, bg, res); + /* + * sr_bg_blkno might have been changed by + * ocfs2_bg_discontig_fix_result + */ + res->sr_bg_stable_blkno = group_bh->b_blocknr; /* * Keep track of previous block descriptor read. When @@ -1784,7 +1828,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, * Do this *after* figuring out how many bits we're taking out * of our target group. */ - if (ac->ac_allow_chain_relink && + if (!ac->ac_disable_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, @@ -1796,22 +1840,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, } } - /* Ok, claim our bits now: set the info on dinode, chainlist - * and then the group */ - status = ocfs2_journal_access_di(handle, - INODE_CACHE(alloc_inode), - ac->ac_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { + if (ac->ac_find_loc_only) + goto out_loc_only; + + status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, + ac->ac_bh, res->sr_bits, + chain); + if (status) { mlog_errno(status); goto bail; } - tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); - fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used); - le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits); - ocfs2_journal_dirty(handle, ac->ac_bh); - status = ocfs2_block_group_set_bits(handle, alloc_inode, bg, @@ -1819,19 +1858,24 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, res->sr_bit_offset, res->sr_bits); if (status < 0) { + ocfs2_rollback_alloc_dinode_counts(alloc_inode, + ac->ac_bh, res->sr_bits, chain); mlog_errno(status); goto bail; } - mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, - (unsigned long long)le64_to_cpu(fe->i_blkno)); + trace_ocfs2_search_chain_end( + (unsigned long long)le64_to_cpu(fe->i_blkno), + res->sr_bits); +out_loc_only: *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: brelse(group_bh); brelse(prev_group_bh); - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -1845,11 +1889,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, int status; u16 victim, i; u16 bits_left = 0; + u64 hint = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; - mlog_entry_void(); - BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); BUG_ON(!ac->ac_bh); @@ -1872,11 +1915,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, goto bail; } - res->sr_bg_blkno = ac->ac_last_group; + res->sr_bg_blkno = hint; if (res->sr_bg_blkno) { /* Attempt to short-circuit the usual search mechanism * by jumping straight to the most recently used - * allocation group. This helps us mantain some + * allocation group. This helps us maintain some * contiguousness across allocations. */ status = ocfs2_search_one_group(ac, handle, bits_wanted, min_bits, res, &bits_left); @@ -1892,25 +1935,25 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; - ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); - if (!status) + if (!status) { + hint = ocfs2_group_from_res(res); goto set_hint; + } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; } - mlog(0, "Search of victim chain %u came up with nothing, " - "trying all chains now.\n", victim); + trace_ocfs2_claim_suballoc_bits(victim); /* If we didn't pick a good victim, then just default to * searching each chain in order. Don't allow chain relinking * because we only calculate enough journal credits for one * relink per alloc. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; @@ -1920,8 +1963,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, ac->ac_chain = i; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); - if (!status) + if (!status) { + hint = ocfs2_group_from_res(res); break; + } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1936,11 +1981,12 @@ set_hint: if (bits_left < min_bits) ac->ac_last_group = 0; else - ac->ac_last_group = res->sr_bg_blkno; + ac->ac_last_group = hint; } bail: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -1977,7 +2023,8 @@ int ocfs2_claim_metadata(handle_t *handle, *num_bits = res.sr_bits; status = 0; bail: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -2016,6 +2063,138 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir, OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; } +int ocfs2_find_new_inode_loc(struct inode *dir, + struct buffer_head *parent_fe_bh, + struct ocfs2_alloc_context *ac, + u64 *fe_blkno) +{ + int ret; + handle_t *handle = NULL; + struct ocfs2_suballoc_result *res; + + BUG_ON(!ac); + BUG_ON(ac->ac_bits_given != 0); + BUG_ON(ac->ac_bits_wanted != 1); + BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); + + res = kzalloc(sizeof(*res), GFP_NOFS); + if (res == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); + + /* + * The handle started here is for chain relink. Alternatively, + * we could just disable relink for these calls. + */ + handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + handle = NULL; + mlog_errno(ret); + goto out; + } + + /* + * This will instruct ocfs2_claim_suballoc_bits and + * ocfs2_search_one_group to search but save actual allocation + * for later. + */ + ac->ac_find_loc_only = 1; + + ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ac->ac_find_loc_priv = res; + *fe_blkno = res->sr_blkno; + ocfs2_update_inode_fsync_trans(handle, dir, 0); +out: + if (handle) + ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); + + if (ret) + kfree(res); + + return ret; +} + +int ocfs2_claim_new_inode_at_loc(handle_t *handle, + struct inode *dir, + struct ocfs2_alloc_context *ac, + u64 *suballoc_loc, + u16 *suballoc_bit, + u64 di_blkno) +{ + int ret; + u16 chain; + struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv; + struct buffer_head *bg_bh = NULL; + struct ocfs2_group_desc *bg; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data; + + /* + * Since di_blkno is being passed back in, we check for any + * inconsistencies which may have happened between + * calls. These are code bugs as di_blkno is not expected to + * change once returned from ocfs2_find_new_inode_loc() + */ + BUG_ON(res->sr_blkno != di_blkno); + + ret = ocfs2_read_group_descriptor(ac->ac_inode, di, + res->sr_bg_stable_blkno, &bg_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + bg = (struct ocfs2_group_desc *) bg_bh->b_data; + chain = le16_to_cpu(bg->bg_chain); + + ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle, + ac->ac_bh, res->sr_bits, + chain); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_block_group_set_bits(handle, + ac->ac_inode, + bg, + bg_bh, + res->sr_bit_offset, + res->sr_bits); + if (ret < 0) { + ocfs2_rollback_alloc_dinode_counts(ac->ac_inode, + ac->ac_bh, res->sr_bits, chain); + mlog_errno(ret); + goto out; + } + + trace_ocfs2_claim_new_inode_at_loc((unsigned long long)di_blkno, + res->sr_bits); + + atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); + + BUG_ON(res->sr_bits != 1); + + *suballoc_loc = res->sr_bg_blkno; + *suballoc_bit = res->sr_bit_offset; + ac->ac_bits_given++; + ocfs2_save_inode_ac_group(dir, ac); + +out: + brelse(bg_bh); + + return ret; +} + int ocfs2_claim_new_inode(handle_t *handle, struct inode *dir, struct buffer_head *parent_fe_bh, @@ -2027,8 +2206,6 @@ int ocfs2_claim_new_inode(handle_t *handle, int status; struct ocfs2_suballoc_result res; - mlog_entry_void(); - BUG_ON(!ac); BUG_ON(ac->ac_bits_given != 0); BUG_ON(ac->ac_bits_wanted != 1); @@ -2056,7 +2233,8 @@ int ocfs2_claim_new_inode(handle_t *handle, ocfs2_save_inode_ac_group(dir, ac); status = 0; bail: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -2133,8 +2311,6 @@ int __ocfs2_claim_clusters(handle_t *handle, struct ocfs2_suballoc_result res = { .sr_blkno = 0, }; struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb); - mlog_entry_void(); - BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL @@ -2189,7 +2365,8 @@ int __ocfs2_claim_clusters(handle_t *handle, ac->ac_bits_given += *num_clusters; bail: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -2218,13 +2395,11 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, unsigned int tmp; struct ocfs2_group_desc *undo_bg = NULL; - mlog_entry_void(); - /* The caller got this descriptor from * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); - mlog(0, "off = %u, num = %u\n", bit_off, num_bits); + trace_ocfs2_block_group_clear_bits(bit_off, num_bits); BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), @@ -2253,6 +2428,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, (unsigned long *) undo_bg->bg_bitmap); } le16_add_cpu(&bg->bg_free_bits_count, num_bits); + if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { + ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" + " count %u but claims %u are freed. num_bits %d", + (unsigned long long)le64_to_cpu(bg->bg_blkno), + le16_to_cpu(bg->bg_bits), + le16_to_cpu(bg->bg_free_bits_count), num_bits); + return -EROFS; + } if (undo_fn) jbd_unlock_bh_state(group_bh); @@ -2281,19 +2464,18 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *group; - mlog_entry_void(); - /* The alloc_bh comes from ocfs2_free_dinode() or * ocfs2_free_clusters(). The callers have all locked the * allocator and gotten alloc_bh from the lock call. This - * validates the dinode buffer. Any corruption that has happended + * validates the dinode buffer. Any corruption that has happened * is a code bug. */ BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); - mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", - (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, - (unsigned long long)bg_blkno, start_bit); + trace_ocfs2_free_suballoc_bits( + (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, + (unsigned long long)bg_blkno, + start_bit, count); status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno, &group_bh); @@ -2329,7 +2511,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, bail: brelse(group_bh); - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -2374,11 +2557,8 @@ static int _ocfs2_free_clusters(handle_t *handle, /* You can't ever have a contiguous set of clusters * bigger than a block group bitmap so we never have to worry - * about looping on them. */ - - mlog_entry_void(); - - /* This is expensive. We can safely remove once this stuff has + * about looping on them. + * This is expensive. We can safely remove once this stuff has * gotten tested really well. */ BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); @@ -2387,10 +2567,9 @@ static int _ocfs2_free_clusters(handle_t *handle, ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, &bg_start_bit); - mlog(0, "want to free %u clusters starting at block %llu\n", - num_clusters, (unsigned long long)start_blk); - mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", - (unsigned long long)bg_blkno, bg_start_bit); + trace_ocfs2_free_clusters((unsigned long long)bg_blkno, + (unsigned long long)start_blk, + bg_start_bit, num_clusters); status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, bg_start_bit, bg_blkno, @@ -2404,7 +2583,8 @@ static int _ocfs2_free_clusters(handle_t *handle, num_clusters); out: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -2567,13 +2747,14 @@ out: * suballoc_bit. */ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, - u16 *suballoc_slot, u16 *suballoc_bit) + u16 *suballoc_slot, u64 *group_blkno, + u16 *suballoc_bit) { int status; struct buffer_head *inode_bh = NULL; struct ocfs2_dinode *inode_fe; - mlog_entry("blkno: %llu\n", (unsigned long long)blkno); + trace_ocfs2_get_suballoc_slot_bit((unsigned long long)blkno); /* dirty read disk */ status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); @@ -2604,11 +2785,14 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); if (suballoc_bit) *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); + if (group_blkno) + *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc); bail: brelse(inode_bh); - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -2621,7 +2805,8 @@ bail: */ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, struct inode *suballoc, - struct buffer_head *alloc_bh, u64 blkno, + struct buffer_head *alloc_bh, + u64 group_blkno, u64 blkno, u16 bit, int *res) { struct ocfs2_dinode *alloc_di; @@ -2630,8 +2815,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, u64 bg_blkno; int status; - mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, - (unsigned int)bit); + trace_ocfs2_test_suballoc_bit((unsigned long long)blkno, + (unsigned int)bit); alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data; if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) { @@ -2642,10 +2827,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, goto bail; } - if (alloc_di->i_suballoc_loc) - bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc); - else - bg_blkno = ocfs2_which_suballoc_group(blkno, bit); + bg_blkno = group_blkno ? group_blkno : + ocfs2_which_suballoc_group(blkno, bit); status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, &group_bh); if (status < 0) { @@ -2660,7 +2843,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, bail: brelse(group_bh); - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -2680,14 +2864,15 @@ bail: int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) { int status; + u64 group_blkno = 0; u16 suballoc_bit = 0, suballoc_slot = 0; struct inode *inode_alloc_inode; struct buffer_head *alloc_bh = NULL; - mlog_entry("blkno: %llu", (unsigned long long)blkno); + trace_ocfs2_test_inode_bit((unsigned long long)blkno); status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, - &suballoc_bit); + &group_blkno, &suballoc_bit); if (status < 0) { mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); goto bail; @@ -2709,13 +2894,14 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); if (status < 0) { mutex_unlock(&inode_alloc_inode->i_mutex); + iput(inode_alloc_inode); mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n", (u32)suballoc_slot, status); goto bail; } status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, - blkno, suballoc_bit, res); + group_blkno, blkno, suballoc_bit, res); if (status < 0) mlog(ML_ERROR, "test suballoc bit failed %d\n", status); @@ -2725,6 +2911,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) iput(inode_alloc_inode); brelse(alloc_bh); bail: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } |
