aboutsummaryrefslogtreecommitdiff
path: root/fs/ocfs2/suballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/suballoc.c')
-rw-r--r--fs/ocfs2/suballoc.c515
1 files changed, 351 insertions, 164 deletions
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a8e6a95a353..0cb889a17ae 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/highmem.h>
-#define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h>
#include "ocfs2.h"
@@ -44,6 +43,7 @@
#include "super.h"
#include "sysfile.h"
#include "uptodate.h"
+#include "ocfs2_trace.h"
#include "buffer_head_io.h"
@@ -57,11 +57,28 @@ struct ocfs2_suballoc_result {
u64 sr_bg_blkno; /* The bg we allocated from. Set
to 0 when a block group is
contiguous. */
+ u64 sr_bg_stable_blkno; /*
+ * Doesn't change, always
+ * set to target block
+ * group descriptor
+ * block.
+ */
u64 sr_blkno; /* The first allocated block */
unsigned int sr_bit_offset; /* The bit in the bg */
unsigned int sr_bits; /* How many bits we claimed */
};
+static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
+{
+ if (res->sr_blkno == 0)
+ return 0;
+
+ if (res->sr_bg_blkno)
+ return res->sr_bg_blkno;
+
+ return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
+}
+
static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -96,12 +113,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
struct ocfs2_suballoc_result *res);
static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
int nr);
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct ocfs2_group_desc *bg,
- struct buffer_head *group_bh,
- unsigned int bit_off,
- unsigned int num_bits);
static int ocfs2_relink_block_group(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *fe_bh,
@@ -138,6 +149,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
brelse(ac->ac_bh);
ac->ac_bh = NULL;
ac->ac_resv = NULL;
+ if (ac->ac_find_loc_priv) {
+ kfree(ac->ac_find_loc_priv);
+ ac->ac_find_loc_priv = NULL;
+ }
}
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -287,8 +302,8 @@ static int ocfs2_validate_group_descriptor(struct super_block *sb,
int rc;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
- mlog(0, "Validating group descriptor %llu\n",
- (unsigned long long)bh->b_blocknr);
+ trace_ocfs2_validate_group_descriptor(
+ (unsigned long long)bh->b_blocknr);
BUG_ON(!buffer_uptodate(bh));
@@ -336,7 +351,7 @@ out:
static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
struct ocfs2_group_desc *bg,
struct ocfs2_chain_list *cl,
- u64 p_blkno, u32 clusters)
+ u64 p_blkno, unsigned int clusters)
{
struct ocfs2_extent_list *el = &bg->bg_list;
struct ocfs2_extent_rec *rec;
@@ -348,7 +363,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
rec->e_blkno = cpu_to_le64(p_blkno);
rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
le16_to_cpu(cl->cl_bpc));
- rec->e_leaf_clusters = cpu_to_le32(clusters);
+ rec->e_leaf_clusters = cpu_to_le16(clusters);
le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
le16_add_cpu(&bg->bg_free_bits_count,
clusters * le16_to_cpu(cl->cl_bpc));
@@ -368,8 +383,6 @@ static int ocfs2_block_group_fill(handle_t *handle,
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct super_block * sb = alloc_inode->i_sb;
- mlog_entry_void();
-
if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
"b_blocknr (%llu)",
@@ -415,7 +428,8 @@ static int ocfs2_block_group_fill(handle_t *handle,
* allocation time. */
bail:
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -456,12 +470,12 @@ ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
/* setup the group */
bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
- mlog(0, "new descriptor, record %u, at block %llu\n",
- alloc_rec, (unsigned long long)bg_blkno);
+ trace_ocfs2_block_group_alloc_contig(
+ (unsigned long long)bg_blkno, alloc_rec);
bg_bh = sb_getblk(osb->sb, bg_blkno);
if (!bg_bh) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
@@ -580,7 +594,7 @@ static void ocfs2_bg_alloc_cleanup(handle_t *handle,
ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
cluster_ac->ac_bh,
le64_to_cpu(rec->e_blkno),
- le32_to_cpu(rec->e_leaf_clusters));
+ le16_to_cpu(rec->e_leaf_clusters));
if (ret)
mlog_errno(ret);
/* Try all the clusters to free */
@@ -622,7 +636,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
* cluster groups will be staying in cache for the duration of
* this operation.
*/
- ac->ac_allow_chain_relink = 0;
+ ac->ac_disable_chain_relink = 1;
/* Claim the first region */
status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
@@ -636,12 +650,12 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
/* setup the group */
bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
- mlog(0, "new descriptor, record %u, at block %llu\n",
- alloc_rec, (unsigned long long)bg_blkno);
+ trace_ocfs2_block_group_alloc_discontig(
+ (unsigned long long)bg_blkno, alloc_rec);
bg_bh = sb_getblk(osb->sb, bg_blkno);
if (!bg_bh) {
- status = -EIO;
+ status = -ENOMEM;
mlog_errno(status);
goto bail;
}
@@ -686,8 +700,6 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
- mlog_entry_void();
-
cl = &fe->id2.i_chain;
status = ocfs2_reserve_clusters_with_limit(osb,
le16_to_cpu(cl->cl_cpg),
@@ -709,8 +721,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
}
if (last_alloc_group && *last_alloc_group != 0) {
- mlog(0, "use old allocation group %llu for block group alloc\n",
- (unsigned long long)*last_alloc_group);
+ trace_ocfs2_block_group_alloc(
+ (unsigned long long)*last_alloc_group);
ac->ac_last_group = *last_alloc_group;
}
@@ -759,6 +771,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
+ ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0);
status = 0;
@@ -775,7 +788,8 @@ bail:
brelse(bg_bh);
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -793,8 +807,6 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
struct ocfs2_dinode *fe;
u32 free_bits;
- mlog_entry_void();
-
alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
if (!alloc_inode) {
mlog_errno(-EINVAL);
@@ -834,16 +846,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
if (bits_wanted > free_bits) {
/* cluster bitmap never grows */
if (ocfs2_is_cluster_bitmap(alloc_inode)) {
- mlog(0, "Disk Full: wanted=%u, free_bits=%u\n",
- bits_wanted, free_bits);
+ trace_ocfs2_reserve_suballoc_bits_nospc(bits_wanted,
+ free_bits);
status = -ENOSPC;
goto bail;
}
if (!(flags & ALLOC_NEW_GROUP)) {
- mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
- "and we don't alloc a new group for it.\n",
- slot, bits_wanted, free_bits);
+ trace_ocfs2_reserve_suballoc_bits_no_new_group(
+ slot, bits_wanted, free_bits);
status = -ENOSPC;
goto bail;
}
@@ -869,7 +880,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
bail:
brelse(bh);
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -1031,7 +1043,8 @@ bail:
*ac = NULL;
}
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -1098,8 +1111,8 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
spin_lock(&osb->osb_lock);
osb->osb_inode_alloc_group = alloc_group;
spin_unlock(&osb->osb_lock);
- mlog(0, "after reservation, new allocation group is "
- "%llu\n", (unsigned long long)alloc_group);
+ trace_ocfs2_reserve_new_inode_new_group(
+ (unsigned long long)alloc_group);
/*
* Some inodes must be freed by us, so try to allocate
@@ -1131,7 +1144,8 @@ bail:
*ac = NULL;
}
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -1168,8 +1182,6 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
{
int status;
- mlog_entry_void();
-
*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
if (!(*ac)) {
status = -ENOMEM;
@@ -1208,7 +1220,8 @@ bail:
*ac = NULL;
}
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -1325,7 +1338,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
return status;
}
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
+int ocfs2_block_group_set_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
@@ -1336,15 +1349,12 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
void *bitmap = bg->bg_bitmap;
int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
- mlog_entry_void();
-
/* All callers get the descriptor via
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
- mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
- num_bits);
+ trace_ocfs2_block_group_set_bits(bit_off, num_bits);
if (ocfs2_is_cluster_bitmap(alloc_inode))
journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
@@ -1359,13 +1369,20 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
}
le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
+ if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
+ ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
+ " count %u but claims %u are freed. num_bits %d",
+ (unsigned long long)le64_to_cpu(bg->bg_blkno),
+ le16_to_cpu(bg->bg_bits),
+ le16_to_cpu(bg->bg_free_bits_count), num_bits);
+ return -EROFS;
+ }
while(num_bits--)
ocfs2_set_bit(bit_off++, bitmap);
ocfs2_journal_dirty(handle, group_bh);
bail:
- mlog_exit(status);
return status;
}
@@ -1398,7 +1415,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
int status;
/* there is a really tiny chance the journal calls could fail,
* but we wouldn't want inconsistent blocks in *any* case. */
- u64 fe_ptr, bg_ptr, prev_bg_ptr;
+ u64 bg_ptr, prev_bg_ptr;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
@@ -1408,55 +1425,49 @@ static int ocfs2_relink_block_group(handle_t *handle,
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
- mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
- (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
- (unsigned long long)le64_to_cpu(bg->bg_blkno),
- (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
+ trace_ocfs2_relink_block_group(
+ (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
+ (unsigned long long)le64_to_cpu(bg->bg_blkno),
+ (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
- fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
bg_ptr = le64_to_cpu(bg->bg_next_group);
prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
prev_bg_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto out_rollback;
- }
+ if (status < 0)
+ goto out;
prev_bg->bg_next_group = bg->bg_next_group;
ocfs2_journal_dirty(handle, prev_bg_bh);
status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto out_rollback;
- }
+ if (status < 0)
+ goto out_rollback_prev_bg;
bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
ocfs2_journal_dirty(handle, bg_bh);
status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto out_rollback;
- }
+ if (status < 0)
+ goto out_rollback_bg;
fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
ocfs2_journal_dirty(handle, fe_bh);
-out_rollback:
- if (status < 0) {
- fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
- bg->bg_next_group = cpu_to_le64(bg_ptr);
- prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
- }
-
- mlog_exit(status);
+out:
+ if (status < 0)
+ mlog_errno(status);
return status;
+
+out_rollback_bg:
+ bg->bg_next_group = cpu_to_le64(bg_ptr);
+out_rollback_prev_bg:
+ prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
+ goto out;
}
static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
@@ -1486,7 +1497,7 @@ static int ocfs2_cluster_group_search(struct inode *inode,
max_bits = le16_to_cpu(gd->bg_bits);
/* Tail groups in cluster bitmaps which aren't cpg
- * aligned are prone to partial extention by a failed
+ * aligned are prone to partial extension by a failed
* fs resize. If the file system resize never got to
* update the dinode cluster count, then we don't want
* to trust any clusters past it, regardless of what
@@ -1496,10 +1507,10 @@ static int ocfs2_cluster_group_search(struct inode *inode,
if ((gd_cluster_off + max_bits) >
OCFS2_I(inode)->ip_clusters) {
max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
- mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- OCFS2_I(inode)->ip_clusters, max_bits);
+ trace_ocfs2_cluster_group_search_wrong_max_bits(
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits),
+ OCFS2_I(inode)->ip_clusters, max_bits);
}
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
@@ -1513,9 +1524,9 @@ static int ocfs2_cluster_group_search(struct inode *inode,
gd_cluster_off +
res->sr_bit_offset +
res->sr_bits);
- mlog(0, "Checking %llu against %llu\n",
- (unsigned long long)blkoff,
- (unsigned long long)max_block);
+ trace_ocfs2_cluster_group_search_max_block(
+ (unsigned long long)blkoff,
+ (unsigned long long)max_block);
if (blkoff > max_block)
return -ENOSPC;
}
@@ -1559,9 +1570,9 @@ static int ocfs2_block_group_search(struct inode *inode,
if (!ret && max_block) {
blkoff = le64_to_cpu(bg->bg_blkno) +
res->sr_bit_offset + res->sr_bits;
- mlog(0, "Checking %llu against %llu\n",
- (unsigned long long)blkoff,
- (unsigned long long)max_block);
+ trace_ocfs2_block_group_search_max_block(
+ (unsigned long long)blkoff,
+ (unsigned long long)max_block);
if (blkoff > max_block)
ret = -ENOSPC;
}
@@ -1570,7 +1581,7 @@ static int ocfs2_block_group_search(struct inode *inode,
return ret;
}
-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
handle_t *handle,
struct buffer_head *di_bh,
u32 num_bits,
@@ -1597,13 +1608,28 @@ out:
return ret;
}
+void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
+ struct buffer_head *di_bh,
+ u32 num_bits,
+ u16 chain)
+{
+ u32 tmp_used;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+ struct ocfs2_chain_list *cl;
+
+ cl = (struct ocfs2_chain_list *)&di->id2.i_chain;
+ tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
+ di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits);
+ le32_add_cpu(&cl->cl_recs[chain].c_free, num_bits);
+}
+
static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
struct ocfs2_extent_rec *rec,
struct ocfs2_chain_list *cl)
{
unsigned int bpc = le16_to_cpu(cl->cl_bpc);
unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
- unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc;
+ unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc;
if (res->sr_bit_offset < bitoff)
return 0;
@@ -1678,6 +1704,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
if (!ret)
ocfs2_bg_discontig_fix_result(ac, gd, res);
+ /*
+ * sr_bg_blkno might have been changed by
+ * ocfs2_bg_discontig_fix_result
+ */
+ res->sr_bg_stable_blkno = group_bh->b_blocknr;
+
+ if (ac->ac_find_loc_only)
+ goto out_loc_only;
+
ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
res->sr_bits,
le16_to_cpu(gd->bg_chain));
@@ -1688,9 +1723,14 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
res->sr_bit_offset, res->sr_bits);
- if (ret < 0)
+ if (ret < 0) {
+ ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh,
+ res->sr_bits,
+ le16_to_cpu(gd->bg_chain));
mlog_errno(ret);
+ }
+out_loc_only:
*bits_left = le16_to_cpu(gd->bg_free_bits_count);
out:
@@ -1708,7 +1748,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
{
int status;
u16 chain;
- u32 tmp_used;
u64 next_group;
struct inode *alloc_inode = ac->ac_inode;
struct buffer_head *group_bh = NULL;
@@ -1718,9 +1757,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
struct ocfs2_group_desc *bg;
chain = ac->ac_chain;
- mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
- bits_wanted, chain,
- (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
+ trace_ocfs2_search_chain_begin(
+ (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
+ bits_wanted, chain);
status = ocfs2_read_group_descriptor(alloc_inode, fe,
le64_to_cpu(cl->cl_recs[chain].c_blkno),
@@ -1761,8 +1800,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
goto bail;
}
- mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
- res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
+ trace_ocfs2_search_chain_succ(
+ (unsigned long long)le64_to_cpu(bg->bg_blkno), res->sr_bits);
res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
@@ -1770,6 +1809,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
if (!status)
ocfs2_bg_discontig_fix_result(ac, bg, res);
+ /*
+ * sr_bg_blkno might have been changed by
+ * ocfs2_bg_discontig_fix_result
+ */
+ res->sr_bg_stable_blkno = group_bh->b_blocknr;
/*
* Keep track of previous block descriptor read. When
@@ -1784,7 +1828,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
* Do this *after* figuring out how many bits we're taking out
* of our target group.
*/
- if (ac->ac_allow_chain_relink &&
+ if (!ac->ac_disable_chain_relink &&
(prev_group_bh) &&
(ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
status = ocfs2_relink_block_group(handle, alloc_inode,
@@ -1796,22 +1840,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
}
}
- /* Ok, claim our bits now: set the info on dinode, chainlist
- * and then the group */
- status = ocfs2_journal_access_di(handle,
- INODE_CACHE(alloc_inode),
- ac->ac_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
+ if (ac->ac_find_loc_only)
+ goto out_loc_only;
+
+ status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
+ ac->ac_bh, res->sr_bits,
+ chain);
+ if (status) {
mlog_errno(status);
goto bail;
}
- tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
- fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
- le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
- ocfs2_journal_dirty(handle, ac->ac_bh);
-
status = ocfs2_block_group_set_bits(handle,
alloc_inode,
bg,
@@ -1819,19 +1858,24 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
res->sr_bit_offset,
res->sr_bits);
if (status < 0) {
+ ocfs2_rollback_alloc_dinode_counts(alloc_inode,
+ ac->ac_bh, res->sr_bits, chain);
mlog_errno(status);
goto bail;
}
- mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
- (unsigned long long)le64_to_cpu(fe->i_blkno));
+ trace_ocfs2_search_chain_end(
+ (unsigned long long)le64_to_cpu(fe->i_blkno),
+ res->sr_bits);
+out_loc_only:
*bits_left = le16_to_cpu(bg->bg_free_bits_count);
bail:
brelse(group_bh);
brelse(prev_group_bh);
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -1845,11 +1889,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
int status;
u16 victim, i;
u16 bits_left = 0;
+ u64 hint = ac->ac_last_group;
struct ocfs2_chain_list *cl;
struct ocfs2_dinode *fe;
- mlog_entry_void();
-
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
BUG_ON(!ac->ac_bh);
@@ -1872,11 +1915,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
goto bail;
}
- res->sr_bg_blkno = ac->ac_last_group;
+ res->sr_bg_blkno = hint;
if (res->sr_bg_blkno) {
/* Attempt to short-circuit the usual search mechanism
* by jumping straight to the most recently used
- * allocation group. This helps us mantain some
+ * allocation group. This helps us maintain some
* contiguousness across allocations. */
status = ocfs2_search_one_group(ac, handle, bits_wanted,
min_bits, res, &bits_left);
@@ -1892,25 +1935,25 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim;
- ac->ac_allow_chain_relink = 1;
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
- if (!status)
+ if (!status) {
+ hint = ocfs2_group_from_res(res);
goto set_hint;
+ }
if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
goto bail;
}
- mlog(0, "Search of victim chain %u came up with nothing, "
- "trying all chains now.\n", victim);
+ trace_ocfs2_claim_suballoc_bits(victim);
/* If we didn't pick a good victim, then just default to
* searching each chain in order. Don't allow chain relinking
* because we only calculate enough journal credits for one
* relink per alloc. */
- ac->ac_allow_chain_relink = 0;
+ ac->ac_disable_chain_relink = 1;
for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
if (i == victim)
continue;
@@ -1920,8 +1963,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
ac->ac_chain = i;
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
- if (!status)
+ if (!status) {
+ hint = ocfs2_group_from_res(res);
break;
+ }
if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
goto bail;
@@ -1936,11 +1981,12 @@ set_hint:
if (bits_left < min_bits)
ac->ac_last_group = 0;
else
- ac->ac_last_group = res->sr_bg_blkno;
+ ac->ac_last_group = hint;
}
bail:
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -1977,7 +2023,8 @@ int ocfs2_claim_metadata(handle_t *handle,
*num_bits = res.sr_bits;
status = 0;
bail:
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -2016,6 +2063,138 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
}
+int ocfs2_find_new_inode_loc(struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ struct ocfs2_alloc_context *ac,
+ u64 *fe_blkno)
+{
+ int ret;
+ handle_t *handle = NULL;
+ struct ocfs2_suballoc_result *res;
+
+ BUG_ON(!ac);
+ BUG_ON(ac->ac_bits_given != 0);
+ BUG_ON(ac->ac_bits_wanted != 1);
+ BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
+
+ res = kzalloc(sizeof(*res), GFP_NOFS);
+ if (res == NULL) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
+
+ /*
+ * The handle started here is for chain relink. Alternatively,
+ * we could just disable relink for these calls.
+ */
+ handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ handle = NULL;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * This will instruct ocfs2_claim_suballoc_bits and
+ * ocfs2_search_one_group to search but save actual allocation
+ * for later.
+ */
+ ac->ac_find_loc_only = 1;
+
+ ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ac->ac_find_loc_priv = res;
+ *fe_blkno = res->sr_blkno;
+ ocfs2_update_inode_fsync_trans(handle, dir, 0);
+out:
+ if (handle)
+ ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
+
+ if (ret)
+ kfree(res);
+
+ return ret;
+}
+
+int ocfs2_claim_new_inode_at_loc(handle_t *handle,
+ struct inode *dir,
+ struct ocfs2_alloc_context *ac,
+ u64 *suballoc_loc,
+ u16 *suballoc_bit,
+ u64 di_blkno)
+{
+ int ret;
+ u16 chain;
+ struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv;
+ struct buffer_head *bg_bh = NULL;
+ struct ocfs2_group_desc *bg;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data;
+
+ /*
+ * Since di_blkno is being passed back in, we check for any
+ * inconsistencies which may have happened between
+ * calls. These are code bugs as di_blkno is not expected to
+ * change once returned from ocfs2_find_new_inode_loc()
+ */
+ BUG_ON(res->sr_blkno != di_blkno);
+
+ ret = ocfs2_read_group_descriptor(ac->ac_inode, di,
+ res->sr_bg_stable_blkno, &bg_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+ chain = le16_to_cpu(bg->bg_chain);
+
+ ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle,
+ ac->ac_bh, res->sr_bits,
+ chain);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_block_group_set_bits(handle,
+ ac->ac_inode,
+ bg,
+ bg_bh,
+ res->sr_bit_offset,
+ res->sr_bits);
+ if (ret < 0) {
+ ocfs2_rollback_alloc_dinode_counts(ac->ac_inode,
+ ac->ac_bh, res->sr_bits, chain);
+ mlog_errno(ret);
+ goto out;
+ }
+
+ trace_ocfs2_claim_new_inode_at_loc((unsigned long long)di_blkno,
+ res->sr_bits);
+
+ atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
+
+ BUG_ON(res->sr_bits != 1);
+
+ *suballoc_loc = res->sr_bg_blkno;
+ *suballoc_bit = res->sr_bit_offset;
+ ac->ac_bits_given++;
+ ocfs2_save_inode_ac_group(dir, ac);
+
+out:
+ brelse(bg_bh);
+
+ return ret;
+}
+
int ocfs2_claim_new_inode(handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
@@ -2027,8 +2206,6 @@ int ocfs2_claim_new_inode(handle_t *handle,
int status;
struct ocfs2_suballoc_result res;
- mlog_entry_void();
-
BUG_ON(!ac);
BUG_ON(ac->ac_bits_given != 0);
BUG_ON(ac->ac_bits_wanted != 1);
@@ -2056,7 +2233,8 @@ int ocfs2_claim_new_inode(handle_t *handle,
ocfs2_save_inode_ac_group(dir, ac);
status = 0;
bail:
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -2133,8 +2311,6 @@ int __ocfs2_claim_clusters(handle_t *handle,
struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
- mlog_entry_void();
-
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
@@ -2189,7 +2365,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
ac->ac_bits_given += *num_clusters;
bail:
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -2218,13 +2395,11 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
unsigned int tmp;
struct ocfs2_group_desc *undo_bg = NULL;
- mlog_entry_void();
-
/* The caller got this descriptor from
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
- mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
+ trace_ocfs2_block_group_clear_bits(bit_off, num_bits);
BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
@@ -2253,6 +2428,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
(unsigned long *) undo_bg->bg_bitmap);
}
le16_add_cpu(&bg->bg_free_bits_count, num_bits);
+ if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
+ ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
+ " count %u but claims %u are freed. num_bits %d",
+ (unsigned long long)le64_to_cpu(bg->bg_blkno),
+ le16_to_cpu(bg->bg_bits),
+ le16_to_cpu(bg->bg_free_bits_count), num_bits);
+ return -EROFS;
+ }
if (undo_fn)
jbd_unlock_bh_state(group_bh);
@@ -2281,19 +2464,18 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
struct buffer_head *group_bh = NULL;
struct ocfs2_group_desc *group;
- mlog_entry_void();
-
/* The alloc_bh comes from ocfs2_free_dinode() or
* ocfs2_free_clusters(). The callers have all locked the
* allocator and gotten alloc_bh from the lock call. This
- * validates the dinode buffer. Any corruption that has happended
+ * validates the dinode buffer. Any corruption that has happened
* is a code bug. */
BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
- mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
- (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
- (unsigned long long)bg_blkno, start_bit);
+ trace_ocfs2_free_suballoc_bits(
+ (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
+ (unsigned long long)bg_blkno,
+ start_bit, count);
status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
&group_bh);
@@ -2329,7 +2511,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
bail:
brelse(group_bh);
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -2374,11 +2557,8 @@ static int _ocfs2_free_clusters(handle_t *handle,
/* You can't ever have a contiguous set of clusters
* bigger than a block group bitmap so we never have to worry
- * about looping on them. */
-
- mlog_entry_void();
-
- /* This is expensive. We can safely remove once this stuff has
+ * about looping on them.
+ * This is expensive. We can safely remove once this stuff has
* gotten tested really well. */
BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
@@ -2387,10 +2567,9 @@ static int _ocfs2_free_clusters(handle_t *handle,
ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
&bg_start_bit);
- mlog(0, "want to free %u clusters starting at block %llu\n",
- num_clusters, (unsigned long long)start_blk);
- mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
- (unsigned long long)bg_blkno, bg_start_bit);
+ trace_ocfs2_free_clusters((unsigned long long)bg_blkno,
+ (unsigned long long)start_blk,
+ bg_start_bit, num_clusters);
status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
bg_start_bit, bg_blkno,
@@ -2404,7 +2583,8 @@ static int _ocfs2_free_clusters(handle_t *handle,
num_clusters);
out:
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -2567,13 +2747,14 @@ out:
* suballoc_bit.
*/
static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
- u16 *suballoc_slot, u16 *suballoc_bit)
+ u16 *suballoc_slot, u64 *group_blkno,
+ u16 *suballoc_bit)
{
int status;
struct buffer_head *inode_bh = NULL;
struct ocfs2_dinode *inode_fe;
- mlog_entry("blkno: %llu\n", (unsigned long long)blkno);
+ trace_ocfs2_get_suballoc_slot_bit((unsigned long long)blkno);
/* dirty read disk */
status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
@@ -2604,11 +2785,14 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
*suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
if (suballoc_bit)
*suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
+ if (group_blkno)
+ *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc);
bail:
brelse(inode_bh);
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -2621,7 +2805,8 @@ bail:
*/
static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
struct inode *suballoc,
- struct buffer_head *alloc_bh, u64 blkno,
+ struct buffer_head *alloc_bh,
+ u64 group_blkno, u64 blkno,
u16 bit, int *res)
{
struct ocfs2_dinode *alloc_di;
@@ -2630,8 +2815,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
u64 bg_blkno;
int status;
- mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
- (unsigned int)bit);
+ trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
+ (unsigned int)bit);
alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data;
if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) {
@@ -2642,10 +2827,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
goto bail;
}
- if (alloc_di->i_suballoc_loc)
- bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc);
- else
- bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
+ bg_blkno = group_blkno ? group_blkno :
+ ocfs2_which_suballoc_group(blkno, bit);
status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
&group_bh);
if (status < 0) {
@@ -2660,7 +2843,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
bail:
brelse(group_bh);
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}
@@ -2680,14 +2864,15 @@ bail:
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
{
int status;
+ u64 group_blkno = 0;
u16 suballoc_bit = 0, suballoc_slot = 0;
struct inode *inode_alloc_inode;
struct buffer_head *alloc_bh = NULL;
- mlog_entry("blkno: %llu", (unsigned long long)blkno);
+ trace_ocfs2_test_inode_bit((unsigned long long)blkno);
status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
- &suballoc_bit);
+ &group_blkno, &suballoc_bit);
if (status < 0) {
mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
goto bail;
@@ -2709,13 +2894,14 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
if (status < 0) {
mutex_unlock(&inode_alloc_inode->i_mutex);
+ iput(inode_alloc_inode);
mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
(u32)suballoc_slot, status);
goto bail;
}
status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
- blkno, suballoc_bit, res);
+ group_blkno, blkno, suballoc_bit, res);
if (status < 0)
mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
@@ -2725,6 +2911,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
iput(inode_alloc_inode);
brelse(alloc_bh);
bail:
- mlog_exit(status);
+ if (status)
+ mlog_errno(status);
return status;
}