From 9c7af40b210e87f8fddd97b0badc0a352862234a Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Mon, 28 Jul 2008 18:02:53 -0700 Subject: ocfs2: throttle back local alloc when low on disk space Ocfs2's local allocator disables itself for the duration of a mount point when it has trouble allocating a large enough area from the primary bitmap. That can cause performance problems, especially for disks which were only temporarily full or fragmented. This patch allows for the allocator to shrink it's window first, before being disabled. Later, it can also be re-enabled so that any performance drop is minimized. To do this, we allow the value of osb->local_alloc_bits to be shrunk when needed. The default value is recorded in a mostly read-only variable so that we can re-initialize when required. Locking had to be updated so that we could protect changes to local_alloc_bits. Mostly this involves protecting various local alloc values with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which is used when the local allocator is has shrunk, but is not disabled. If the available space dips below 1 megabyte, the local alloc file is disabled. In either case, local alloc is re-enabled 30 seconds after the event, or when an appropriate amount of bits is seen in the primary bitmap. Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index d2d278fb981..de7b93d76d1 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -111,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, u64 *bg_blkno, u16 *bg_bit_off); -static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) +void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) { struct inode *inode = ac->ac_inode; @@ -686,15 +686,6 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, if ((status < 0) && (status != -ENOSPC)) { mlog_errno(status); goto bail; - } else if (status == -ENOSPC) { - /* reserve_local_bits will return enospc with - * the local alloc inode still locked, so we - * can change this safely here. */ - mlog(0, "Disabling local alloc\n"); - /* We set to OCFS2_LA_DISABLED so that umount - * can clean up what's left of the local - * allocation */ - osb->local_alloc_state = OCFS2_LA_DISABLED; } } @@ -1005,6 +996,7 @@ static int ocfs2_cluster_group_search(struct inode *inode, int search = -ENOSPC; int ret; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u16 tmp_off, tmp_found; unsigned int max_bits, gd_cluster_off; @@ -1045,6 +1037,12 @@ static int ocfs2_cluster_group_search(struct inode *inode, *bit_off = tmp_off; *bits_found = tmp_found; search = 0; /* success */ + } else if (tmp_found) { + /* + * Don't show bits which we'll be returning + * for allocation to the local alloc bitmap. + */ + ocfs2_local_alloc_seen_free_bits(osb, tmp_found); } } @@ -1203,9 +1201,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ - while ((status = ac->ac_group_search(alloc_inode, group_bh, - bits_wanted, min_bits, bit_off, - &tmp_bits)) == -ENOSPC) { + while ((status = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, + min_bits, bit_off, &tmp_bits)) == -ENOSPC) { if (!bg->bg_next_group) break; @@ -1838,9 +1835,15 @@ int ocfs2_free_clusters(handle_t *handle, status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, bg_start_bit, bg_blkno, num_clusters); - if (status < 0) + if (status < 0) { mlog_errno(status); + goto out; + } + + ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb), + num_clusters); +out: mlog_exit(status); return status; } -- cgit v1.2.3-18-g5258 From 811f933df1e55615fd0bb4818f31e3868a8e6e23 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:43 +0800 Subject: ocfs2: Use ocfs2_extent_list instead of ocfs2_dinode. ocfs2_extend_meta_needed(), ocfs2_calc_extend_credits() and ocfs2_reserve_new_metadata() are all useful for extent tree operations. But they are all limited to an inode btree because they use a struct ocfs2_dinode parameter. Change their parameter to struct ocfs2_extent_list (the part of an ocfs2_dinode they actually use) so that the xattr btree code can use these functions. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index de7b93d76d1..2a817bca1dd 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -494,7 +494,7 @@ bail: } int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, - struct ocfs2_dinode *fe, + struct ocfs2_extent_list *root_el, struct ocfs2_alloc_context **ac) { int status; @@ -507,7 +507,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, goto bail; } - (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); + (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(root_el); (*ac)->ac_which = OCFS2_AC_USE_META; slot = osb->slot_num; (*ac)->ac_group_search = ocfs2_block_group_search; -- cgit v1.2.3-18-g5258 From e7d4cb6bc19658646357eeff134645cd9bc3479f Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:44 +0800 Subject: ocfs2: Abstract ocfs2_extent_tree in b-tree operations. In the old extent tree operation, we take the hypothesis that we are using the ocfs2_extent_list in ocfs2_dinode as the tree root. As xattr will also use ocfs2_extent_list to store large value for a xattr entry, we refactor the tree operation so that xattr can use it directly. The refactoring includes 4 steps: 1. Abstract set/get of last_eb_blk and update_clusters since they may be stored in different location for dinode and xattr. 2. Add a new structure named ocfs2_extent_tree to indicate the extent tree the operation will work on. 3. Remove all the use of fe_bh and di, use root_bh and root_el in extent tree instead. So now all the fe_bh is replaced with et->root_bh, el with root_el accordingly. 4. Make ocfs2_lock_allocators generic. Now it is limited to be only used in file extend allocation. But the whole function is useful when we want to store large EAs. Note: This patch doesn't touch ocfs2_commit_truncate() since it is not used for anything other than truncate inode data btrees. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 2a817bca1dd..b642c825fb7 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1894,3 +1894,85 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); } } + +/* + * For a given allocation, determine which allocators will need to be + * accessed, and lock them, reserving the appropriate number of bits. + * + * Sparse file systems call this from ocfs2_write_begin_nolock() + * and ocfs2_allocate_unwritten_extents(). + * + * File systems which don't support holes call this from + * ocfs2_extend_allocation(). + */ +int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh, + struct ocfs2_extent_list *root_el, + u32 clusters_to_add, u32 extents_to_split, + struct ocfs2_alloc_context **data_ac, + struct ocfs2_alloc_context **meta_ac) +{ + int ret = 0, num_free_extents; + unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + *meta_ac = NULL; + if (data_ac) + *data_ac = NULL; + + BUG_ON(clusters_to_add != 0 && data_ac == NULL); + + num_free_extents = ocfs2_num_free_extents(osb, inode, root_bh, + OCFS2_DINODE_EXTENT); + if (num_free_extents < 0) { + ret = num_free_extents; + mlog_errno(ret); + goto out; + } + + /* + * Sparse allocation file systems need to be more conservative + * with reserving room for expansion - the actual allocation + * happens while we've got a journal handle open so re-taking + * a cluster lock (because we ran out of room for another + * extent) will violate ordering rules. + * + * Most of the time we'll only be seeing this 1 cluster at a time + * anyway. + * + * Always lock for any unwritten extents - we might want to + * add blocks during a split. + */ + if (!num_free_extents || + (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { + ret = ocfs2_reserve_new_metadata(osb, root_el, meta_ac); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + } + + if (clusters_to_add == 0) + goto out; + + ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + +out: + if (ret) { + if (*meta_ac) { + ocfs2_free_alloc_context(*meta_ac); + *meta_ac = NULL; + } + + /* + * We cannot have an error and a non null *data_ac. + */ + } + + return ret; +} -- cgit v1.2.3-18-g5258 From f56654c435c06f2b2bd5751889b1a08a3add7d6c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:48 +0800 Subject: ocfs2: Add extent tree operation for xattr value btrees Add some thin wrappers around ocfs2_insert_extent() for each of the 3 different btree types, ocfs2_inode_insert_extent(), ocfs2_xattr_value_insert_extent() and ocfs2_xattr_tree_insert_extent(). The last is for the xattr index btree, which will be used in a followup patch. All the old callers in file.c etc will call ocfs2_dinode_insert_extent(), while the other two handle the xattr issue. And the init of extent tree are handled by these functions. When storing xattr value which is too large, we will allocate some clusters for it and here ocfs2_extent_list and ocfs2_extent_rec will also be used. In order to re-use the b-tree operation code, a new parameter named "private" is added into ocfs2_extent_tree and it is used to indicate the root of ocfs2_exent_list. The reason is that we can't deduce the root from the buffer_head now. It may be in an inode, an ocfs2_xattr_block or even worse, in any place in an ocfs2_xattr_bucket. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b642c825fb7..bb774d70d26 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1909,7 +1909,8 @@ int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh, struct ocfs2_extent_list *root_el, u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, - struct ocfs2_alloc_context **meta_ac) + struct ocfs2_alloc_context **meta_ac, + enum ocfs2_extent_tree_type type, void *private) { int ret = 0, num_free_extents; unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; @@ -1922,7 +1923,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh, BUG_ON(clusters_to_add != 0 && data_ac == NULL); num_free_extents = ocfs2_num_free_extents(osb, inode, root_bh, - OCFS2_DINODE_EXTENT); + type, private); if (num_free_extents < 0) { ret = num_free_extents; mlog_errno(ret); -- cgit v1.2.3-18-g5258 From cf1d6c763fbcb115263114302485ad17e7933d87 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Mon, 18 Aug 2008 17:11:00 +0800 Subject: ocfs2: Add extended attribute support This patch implements storing extended attributes both in inode or a single external block. We only store EA's in-inode when blocksize > 512 or that inode block has free space for it. When an EA's value is larger than 80 bytes, we will store the value via b-tree outside inode or block. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index bb774d70d26..f1871ca8381 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -493,9 +493,9 @@ bail: return status; } -int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, - struct ocfs2_extent_list *root_el, - struct ocfs2_alloc_context **ac) +int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, + int blocks, + struct ocfs2_alloc_context **ac) { int status; u32 slot; @@ -507,7 +507,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, goto bail; } - (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(root_el); + (*ac)->ac_bits_wanted = blocks; (*ac)->ac_which = OCFS2_AC_USE_META; slot = osb->slot_num; (*ac)->ac_group_search = ocfs2_block_group_search; @@ -532,6 +532,15 @@ bail: return status; } +int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, + struct ocfs2_extent_list *root_el, + struct ocfs2_alloc_context **ac) +{ + return ocfs2_reserve_new_metadata_blocks(osb, + ocfs2_extend_meta_needed(root_el), + ac); +} + static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac) { -- cgit v1.2.3-18-g5258 From f99b9b7ccf6a691f653cec45f36bfdd1e94769c7 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 20 Aug 2008 19:36:33 -0700 Subject: ocfs2: Make ocfs2_extent_tree the first-class representation of a tree. We now have three different kinds of extent trees in ocfs2: inode data (dinode), extended attributes (xattr_tree), and extended attribute values (xattr_value). There is a nice abstraction for them, ocfs2_extent_tree, but it is hidden in alloc.c. All the calling functions have to pick amongst a varied API and pass in type bits and often extraneous pointers. A better way is to make ocfs2_extent_tree a first-class object. Everyone converts their object to an ocfs2_extent_tree() via the ocfs2_get_*_extent_tree() calls, then uses the ocfs2_extent_tree for all tree calls to alloc.c. This simplifies a lot of callers, making for readability. It also provides an easy way to add additional extent tree types, as they only need to be defined in alloc.c with a ocfs2_get__extent_tree() function. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f1871ca8381..8d3947e94a2 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1914,12 +1914,11 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) * File systems which don't support holes call this from * ocfs2_extend_allocation(). */ -int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh, - struct ocfs2_extent_list *root_el, +int ocfs2_lock_allocators(struct inode *inode, + struct ocfs2_extent_tree *et, u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, - struct ocfs2_alloc_context **meta_ac, - enum ocfs2_extent_tree_type type, void *private) + struct ocfs2_alloc_context **meta_ac) { int ret = 0, num_free_extents; unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; @@ -1931,8 +1930,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh, BUG_ON(clusters_to_add != 0 && data_ac == NULL); - num_free_extents = ocfs2_num_free_extents(osb, inode, root_bh, - type, private); + num_free_extents = ocfs2_num_free_extents(osb, inode, et); if (num_free_extents < 0) { ret = num_free_extents; mlog_errno(ret); @@ -1954,7 +1952,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh, */ if (!num_free_extents || (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { - ret = ocfs2_reserve_new_metadata(osb, root_el, meta_ac); + ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); -- cgit v1.2.3-18-g5258 From 1187c968852e3c668f3b9376083851f81f6eee22 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 3 Sep 2008 20:03:39 -0700 Subject: ocfs2: Limit inode allocation to 32bits. ocfs2 inode numbers are block numbers. For any filesystem with less than 2^32 blocks, this is not a problem. However, when ocfs2 starts using JDB2, it will be able to support filesystems with more than 2^32 blocks. This would result in inode numbers higher than 2^32. The problem is that stat(2) can't handle those numbers on 32bit machines. The simple solution is to have ocfs2 allocate all inodes below that boundary. The suballoc code is changed to honor an optional block limit. Only the inode suballocator sets that limit - all other allocations stay unlimited. The biggest trick is to grow the inode suballocator beneath that limit. There's no point in allocating block groups that are above the limit, then rejecting their elements later on. We want to prevent the inode allocator from ever having block groups above the limit. This involves a little gyration with the local alloc code. If the local alloc window is above the limit, it signals the caller to try the global bitmap but does not disable the local alloc file (which can be used for other allocations). [ Minor cleanup - removed an ML_NOTICE comment. --Mark ] Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 83 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 15 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8d3947e94a2..213bdca16fe 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle, struct ocfs2_chain_list *cl); static int ocfs2_block_group_alloc(struct ocfs2_super *osb, struct inode *alloc_inode, - struct buffer_head *bh); + struct buffer_head *bh, + u64 max_block); static int ocfs2_cluster_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, + u64 max_block, u16 *bit_off, u16 *bits_found); static int ocfs2_block_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, + u64 max_block, u16 *bit_off, u16 *bits_found); static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, @@ -110,6 +113,9 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, u64 data_blkno, u64 *bg_blkno, u16 *bg_bit_off); +static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, + u32 bits_wanted, u64 max_block, + struct ocfs2_alloc_context **ac); void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) { @@ -276,7 +282,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) */ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, struct inode *alloc_inode, - struct buffer_head *bh) + struct buffer_head *bh, + u64 max_block) { int status, credits; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; @@ -294,9 +301,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, mlog_entry_void(); cl = &fe->id2.i_chain; - status = ocfs2_reserve_clusters(osb, - le16_to_cpu(cl->cl_cpg), - &ac); + status = ocfs2_reserve_clusters_with_limit(osb, + le16_to_cpu(cl->cl_cpg), + max_block, &ac); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -469,7 +476,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, goto bail; } - status = ocfs2_block_group_alloc(osb, alloc_inode, bh); + status = ocfs2_block_group_alloc(osb, alloc_inode, bh, + ac->ac_max_block); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -590,6 +598,13 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, (*ac)->ac_group_search = ocfs2_block_group_search; + /* + * stat(2) can't handle i_ino > 32bits, so we tell the + * lower levels not to allocate us a block group past that + * limit. + */ + (*ac)->ac_max_block = (u32)~0U; + /* * slot is set when we successfully steal inode from other nodes. * It is reset in 3 places: @@ -670,9 +685,9 @@ bail: /* Callers don't need to care which bitmap (local alloc or main) to * use so we figure it out for them, but unfortunately this clutters * things a bit. */ -int ocfs2_reserve_clusters(struct ocfs2_super *osb, - u32 bits_wanted, - struct ocfs2_alloc_context **ac) +static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, + u32 bits_wanted, u64 max_block, + struct ocfs2_alloc_context **ac) { int status; @@ -686,13 +701,18 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, } (*ac)->ac_bits_wanted = bits_wanted; + (*ac)->ac_max_block = max_block; status = -ENOSPC; if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { status = ocfs2_reserve_local_alloc_bits(osb, bits_wanted, *ac); - if ((status < 0) && (status != -ENOSPC)) { + if (status == -EFBIG) { + /* The local alloc window is outside ac_max_block. + * use the main bitmap. */ + status = -ENOSPC; + } else if ((status < 0) && (status != -ENOSPC)) { mlog_errno(status); goto bail; } @@ -718,6 +738,13 @@ bail: return status; } +int ocfs2_reserve_clusters(struct ocfs2_super *osb, + u32 bits_wanted, + struct ocfs2_alloc_context **ac) +{ + return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac); +} + /* * More or less lifted from ext3. I'll leave their description below: * @@ -1000,10 +1027,12 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg static int ocfs2_cluster_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, + u64 max_block, u16 *bit_off, u16 *bits_found) { int search = -ENOSPC; int ret; + u64 blkoff; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u16 tmp_off, tmp_found; @@ -1038,6 +1067,17 @@ static int ocfs2_cluster_group_search(struct inode *inode, if (ret) return ret; + if (max_block) { + blkoff = ocfs2_clusters_to_blocks(inode->i_sb, + gd_cluster_off + + tmp_off + tmp_found); + mlog(0, "Checking %llu against %llu\n", + (unsigned long long)blkoff, + (unsigned long long)max_block); + if (blkoff > max_block) + return -ENOSPC; + } + /* ocfs2_block_group_find_clear_bits() might * return success, but we still want to return * -ENOSPC unless it found the minimum number @@ -1061,19 +1101,31 @@ static int ocfs2_cluster_group_search(struct inode *inode, static int ocfs2_block_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, + u64 max_block, u16 *bit_off, u16 *bits_found) { int ret = -ENOSPC; + u64 blkoff; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; BUG_ON(min_bits != 1); BUG_ON(ocfs2_is_cluster_bitmap(inode)); - if (bg->bg_free_bits_count) + if (bg->bg_free_bits_count) { ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, le16_to_cpu(bg->bg_bits), bit_off, bits_found); + if (!ret && max_block) { + blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + + *bits_found; + mlog(0, "Checking %llu against %llu\n", + (unsigned long long)blkoff, + (unsigned long long)max_block); + if (blkoff > max_block) + ret = -ENOSPC; + } + } return ret; } @@ -1138,7 +1190,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, } ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, - bit_off, &found); + ac->ac_max_block, bit_off, &found); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); @@ -1210,11 +1262,12 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ - while ((status = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, - min_bits, bit_off, &tmp_bits)) == -ENOSPC) { + while ((status = ac->ac_group_search(alloc_inode, group_bh, + bits_wanted, min_bits, + ac->ac_max_block, bit_off, + &tmp_bits)) == -ENOSPC) { if (!bg->bg_next_group) break; - if (prev_group_bh) { brelse(prev_group_bh); prev_group_bh = NULL; -- cgit v1.2.3-18-g5258 From 12462f1d9f0b96389497438dc2730c6f7410be82 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 3 Sep 2008 20:03:40 -0700 Subject: ocfs2: Add the 'inode64' mount option. Now that ocfs2 limits inode numbers to 32bits, add a mount option to disable the limit. This parallels XFS. 64bit systems can handle the larger inode numbers. [ Added description of inode64 mount option in ocfs2.txt. --Mark ] Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 213bdca16fe..d7a6f928c31 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -601,9 +601,10 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, /* * stat(2) can't handle i_ino > 32bits, so we tell the * lower levels not to allocate us a block group past that - * limit. + * limit. The 'inode64' mount option avoids this behavior. */ - (*ac)->ac_max_block = (u32)~0U; + if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64)) + (*ac)->ac_max_block = (u32)~0U; /* * slot is set when we successfully steal inode from other nodes. -- cgit v1.2.3-18-g5258 From a81cb88b64a479b78c6dd5666678d50171865db8 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 7 Oct 2008 14:25:16 -0700 Subject: ocfs2: Don't check for NULL before brelse() This is pointless as brelse() already does the check. Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index d7a6f928c31..08d8844a3c2 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -130,10 +130,8 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) iput(inode); ac->ac_inode = NULL; } - if (ac->ac_bh) { - brelse(ac->ac_bh); - ac->ac_bh = NULL; - } + brelse(ac->ac_bh); + ac->ac_bh = NULL; } void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) @@ -401,8 +399,7 @@ bail: if (ac) ocfs2_free_alloc_context(ac); - if (bg_bh) - brelse(bg_bh); + brelse(bg_bh); mlog_exit(status); return status; @@ -494,8 +491,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, get_bh(bh); ac->ac_bh = bh; bail: - if (bh) - brelse(bh); + brelse(bh); mlog_exit(status); return status; @@ -1269,10 +1265,10 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, &tmp_bits)) == -ENOSPC) { if (!bg->bg_next_group) break; - if (prev_group_bh) { - brelse(prev_group_bh); - prev_group_bh = NULL; - } + + brelse(prev_group_bh); + prev_group_bh = NULL; + next_group = le64_to_cpu(bg->bg_next_group); prev_group_bh = group_bh; group_bh = NULL; @@ -1367,10 +1363,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, *bg_blkno = le64_to_cpu(bg->bg_blkno); *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: - if (group_bh) - brelse(group_bh); - if (prev_group_bh) - brelse(prev_group_bh); + brelse(group_bh); + brelse(prev_group_bh); mlog_exit(status); return status; @@ -1844,8 +1838,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, } bail: - if (group_bh) - brelse(group_bh); + brelse(group_bh); mlog_exit(status); return status; -- cgit v1.2.3-18-g5258 From 31d33073ca38603dea705dae45e094a64ca062d6 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 9 Oct 2008 17:20:30 -0700 Subject: ocfs2: Require an inode for ocfs2_read_block(s)(). Now that synchronous readers are using ocfs2_read_blocks_sync(), all callers of ocfs2_read_blocks() are passing an inode. Use it unconditionally. Since it's there, we don't need to pass the ocfs2_super either. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 08d8844a3c2..f0056b7d435 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1172,8 +1172,8 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, struct ocfs2_group_desc *gd; struct inode *alloc_inode = ac->ac_inode; - ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, - &group_bh, OCFS2_BH_CACHED, alloc_inode); + ret = ocfs2_read_block(alloc_inode, gd_blkno, + &group_bh, OCFS2_BH_CACHED); if (ret < 0) { mlog_errno(ret); return ret; @@ -1242,9 +1242,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, bits_wanted, chain, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); - status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), + status = ocfs2_read_block(alloc_inode, le64_to_cpu(cl->cl_recs[chain].c_blkno), - &group_bh, OCFS2_BH_CACHED, alloc_inode); + &group_bh, OCFS2_BH_CACHED); if (status < 0) { mlog_errno(status); goto bail; @@ -1272,9 +1272,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, next_group = le64_to_cpu(bg->bg_next_group); prev_group_bh = group_bh; group_bh = NULL; - status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), + status = ocfs2_read_block(alloc_inode, next_group, &group_bh, - OCFS2_BH_CACHED, alloc_inode); + OCFS2_BH_CACHED); if (status < 0) { mlog_errno(status); goto bail; @@ -1777,7 +1777,6 @@ int ocfs2_free_suballoc_bits(handle_t *handle, { int status = 0; u32 tmp_used; - struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; struct ocfs2_chain_list *cl = &fe->id2.i_chain; struct buffer_head *group_bh = NULL; @@ -1796,8 +1795,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, (unsigned long long)bg_blkno, start_bit); - status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, - alloc_inode); + status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh, + OCFS2_BH_CACHED); if (status < 0) { mlog_errno(status); goto bail; -- cgit v1.2.3-18-g5258 From 0fcaa56a2a020dd6f90c202b7084e6f4cbedb6c2 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 9 Oct 2008 17:20:31 -0700 Subject: ocfs2: Simplify ocfs2_read_block() More than 30 callers of ocfs2_read_block() pass exactly OCFS2_BH_CACHED. Only six pass a different flag set. Rather than have every caller care, let's make ocfs2_read_block() take no flags and always do a cached read. The remaining six places can call ocfs2_read_blocks() directly. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f0056b7d435..c5ff18b46b5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1172,8 +1172,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, struct ocfs2_group_desc *gd; struct inode *alloc_inode = ac->ac_inode; - ret = ocfs2_read_block(alloc_inode, gd_blkno, - &group_bh, OCFS2_BH_CACHED); + ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); if (ret < 0) { mlog_errno(ret); return ret; @@ -1244,7 +1243,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = ocfs2_read_block(alloc_inode, le64_to_cpu(cl->cl_recs[chain].c_blkno), - &group_bh, OCFS2_BH_CACHED); + &group_bh); if (status < 0) { mlog_errno(status); goto bail; @@ -1273,8 +1272,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, prev_group_bh = group_bh; group_bh = NULL; status = ocfs2_read_block(alloc_inode, - next_group, &group_bh, - OCFS2_BH_CACHED); + next_group, &group_bh); if (status < 0) { mlog_errno(status); goto bail; @@ -1795,8 +1793,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, (unsigned long long)bg_blkno, start_bit); - status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh, - OCFS2_BH_CACHED); + status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); if (status < 0) { mlog_errno(status); goto bail; -- cgit v1.2.3-18-g5258