aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4/balloc.c
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2013-04-09 22:11:22 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-04-09 22:11:22 -0400
commit27dd43854227bb0e6ab70129bd21b60d396db2e7 (patch)
tree14e490b9d0ac63583849c4cad4b3ad0123902c3a /fs/ext4/balloc.c
parentf45a5ef91bef7e02149a216ed6dc3fcdd8b38268 (diff)
ext4: introduce reserved space
Currently in ENOSPC condition when writing into unwritten space, or punching a hole, we might need to split the extent and grow extent tree. However since we can not allocate any new metadata blocks we'll have to zero out unwritten part of extent or punched out part of extent, or in the worst case return ENOSPC even though use actually does not allocate any space. Also in delalloc path we do reserve metadata and data blocks for the time we're going to write out, however metadata block reservation is very tricky especially since we expect that logical connectivity implies physical connectivity, however that might not be the case and hence we might end up allocating more metadata blocks than previously reserved. So in future, metadata reservation checks should be removed since we can not assure that we do not under reserve. And this is where reserved space comes into the picture. When mounting the file system we slice off a little bit of the file system space (2% or 4096 clusters, whichever is smaller) which can be then used for the cases mentioned above to prevent costly zeroout, or unexpected ENOSPC. The number of reserved clusters can be set via sysfs, however it can never be bigger than number of free clusters in the file system. Note that this patch fixes the failure of xfstest 274 as expected. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Diffstat (limited to 'fs/ext4/balloc.c')
-rw-r--r--fs/ext4/balloc.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 9e8d8ffb063..8dcaea69e37 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -499,20 +499,22 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags)
{
- s64 free_clusters, dirty_clusters, root_clusters;
+ s64 free_clusters, dirty_clusters, rsv, resv_clusters;
struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
free_clusters = percpu_counter_read_positive(fcc);
dirty_clusters = percpu_counter_read_positive(dcc);
+ resv_clusters = atomic64_read(&sbi->s_resv_clusters);
/*
* r_blocks_count should always be multiple of the cluster ratio so
* we are safe to do a plane bit shift only.
*/
- root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+ rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
+ resv_clusters;
- if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
+ if (free_clusters - (nclusters + rsv + dirty_clusters) <
EXT4_FREECLUSTERS_WATERMARK) {
free_clusters = percpu_counter_sum_positive(fcc);
dirty_clusters = percpu_counter_sum_positive(dcc);
@@ -520,15 +522,21 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
/* Check whether we have space after accounting for current
* dirty clusters & root reserved clusters.
*/
- if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
+ if (free_clusters >= (rsv + nclusters + dirty_clusters))
return 1;
/* Hm, nope. Are (enough) root reserved clusters available? */
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
capable(CAP_SYS_RESOURCE) ||
- (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
+ (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
+ if (free_clusters >= (nclusters + dirty_clusters +
+ resv_clusters))
+ return 1;
+ }
+ /* No free blocks. Let's see if we can dip into reserved pool */
+ if (flags & EXT4_MB_USE_RESERVED) {
if (free_clusters >= (nclusters + dirty_clusters))
return 1;
}