diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-04-18 12:24:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-04-18 12:24:05 -0700 |
commit | adff377bb1010ec65aada1f94ef2be7c7805c711 (patch) | |
tree | 83742ebd4de5c043b2f58969ef4d6634a301bfe7 /fs | |
parent | d8bdc59f215e62098bc5b4256fd9928bf27053a1 (diff) | |
parent | f65647c29b14f5a32ff6f3237b0ef3b375ed5a79 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (24 commits)
Btrfs: fix free space cache leak
Btrfs: avoid taking the chunk_mutex in do_chunk_alloc
Btrfs end_bio_extent_readpage should look for locked bits
Btrfs: don't force chunk allocation in find_free_extent
Btrfs: Check validity before setting an acl
Btrfs: Fix incorrect inode nlink in btrfs_link()
Btrfs: Check if btrfs_next_leaf() returns error in btrfs_real_readdir()
Btrfs: Check if btrfs_next_leaf() returns error in btrfs_listxattr()
Btrfs: make uncache_state unconditional
btrfs: using cached extent_state in set/unlock combinations
Btrfs: avoid taking the trans_mutex in btrfs_end_transaction
Btrfs: fix subvolume mount by name problem when default mount subvolume is set
fix user annotation in ioctl.c
Btrfs: check for duplicate iov_base's when doing dio reads
btrfs: properly handle overlapping areas in memmove_extent_buffer
Btrfs: fix memory leaks in btrfs_new_inode()
Btrfs: check for duplicate iov_base's when doing dio reads
Btrfs: reuse the extent_map we found when calling btrfs_get_extent
Btrfs: do not use async submit for small DIO io's
Btrfs: don't split dio bios if we don't have to
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/acl.c | 9 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 9 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 125 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 82 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 2 | ||||
-rw-r--r-- | fs/btrfs/file.c | 21 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 119 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 165 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 2 | ||||
-rw-r--r-- | fs/btrfs/super.c | 42 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 48 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 4 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 33 |
14 files changed, 430 insertions, 233 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index de34bfad9ec..5d505aaa72f 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, if (value) { acl = posix_acl_from_xattr(value, size); - if (acl == NULL) { - value = NULL; - size = 0; + if (acl) { + ret = posix_acl_valid(acl); + if (ret) + goto out; } else if (IS_ERR(acl)) { return PTR_ERR(acl); } } ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); - +out: posix_acl_release(acl); return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3458b572554..2e61fe1b6b8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -740,8 +740,10 @@ struct btrfs_space_info { */ unsigned long reservation_progress; - int full; /* indicates that we cannot allocate any more + int full:1; /* indicates that we cannot allocate any more chunks for this space */ + int chunk_alloc:1; /* set if we are allocating a chunk */ + int force_alloc; /* set if we need to force a chunk alloc for this space */ @@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct inode *inode, u64 start, u64 end); int btrfs_release_file(struct inode *inode, struct file *file); +void btrfs_drop_pages(struct page **pages, size_t num_pages); +int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, + struct page **pages, size_t num_pages, + loff_t pos, size_t write_bytes, + struct extent_state **cached); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8f1d44ba332..68c84c8c24b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3057,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); - t->use_count = 0; + atomic_set(&t->use_count, 0); list_del_init(&t->list); memset(t, 0, sizeof(*t)); kmem_cache_free(btrfs_transaction_cachep, t); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f619c3cb13b..31f33ba56fe 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,25 @@ #include "locking.h" #include "free-space-cache.h" +/* control flags for do_chunk_alloc's force field + * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk + * if we really need one. + * + * CHUNK_ALLOC_FORCE means it must try to allocate one + * + * CHUNK_ALLOC_LIMITED means to only try and allocate one + * if we have very few chunks already allocated. This is + * used as part of the clustering code to help make sure + * we have a good pool of storage to cluster in, without + * filling the FS with empty chunks + * + */ +enum { + CHUNK_ALLOC_NO_FORCE = 0, + CHUNK_ALLOC_FORCE = 1, + CHUNK_ALLOC_LIMITED = 2, +}; + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_readonly = 0; found->bytes_may_use = 0; found->full = 0; - found->force_alloc = 0; + found->force_alloc = CHUNK_ALLOC_NO_FORCE; + found->chunk_alloc = 0; *space_info = found; list_add_rcu(&found->list, &info->space_info); atomic_set(&found->caching_threads, 0); @@ -3150,7 +3170,7 @@ again: if (!data_sinfo->full && alloc_chunk) { u64 alloc_target; - data_sinfo->force_alloc = 1; + data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); @@ -3160,7 +3180,8 @@ alloc: ret = do_chunk_alloc(trans, root->fs_info->extent_root, bytes + 2 * 1024 * 1024, - alloc_target, 0); + alloc_target, + CHUNK_ALLOC_NO_FORCE); btrfs_end_transaction(trans, root); if (ret < 0) { if (ret != -ENOSPC) @@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { if (found->flags & BTRFS_BLOCK_GROUP_METADATA) - found->force_alloc = 1; + found->force_alloc = CHUNK_ALLOC_FORCE; } rcu_read_unlock(); } static int should_alloc_chunk(struct btrfs_root *root, - struct btrfs_space_info *sinfo, u64 alloc_bytes) + struct btrfs_space_info *sinfo, u64 alloc_bytes, + int force) { u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; + u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; u64 thresh; - if (sinfo->bytes_used + sinfo->bytes_reserved + - alloc_bytes + 256 * 1024 * 1024 < num_bytes) + if (force == CHUNK_ALLOC_FORCE) + return 1; + + /* + * in limited mode, we want to have some free space up to + * about 1% of the FS size. + */ + if (force == CHUNK_ALLOC_LIMITED) { + thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + thresh = max_t(u64, 64 * 1024 * 1024, + div_factor_fine(thresh, 1)); + + if (num_bytes - num_allocated < thresh) + return 1; + } + + /* + * we have two similar checks here, one based on percentage + * and once based on a hard number of 256MB. The idea + * is that if we have a good amount of free + * room, don't allocate a chunk. A good mount is + * less than 80% utilized of the chunks we have allocated, + * or more than 256MB free + */ + if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) return 0; - if (sinfo->bytes_used + sinfo->bytes_reserved + - alloc_bytes < div_factor(num_bytes, 8)) + if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) return 0; thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + + /* 256MB or 5% of the FS */ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) return 0; - return 1; } @@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, { struct btrfs_space_info *space_info; struct btrfs_fs_info *fs_info = extent_root->fs_info; + int wait_for_alloc = 0; int ret = 0; - mutex_lock(&fs_info->chunk_mutex); - flags = btrfs_reduce_alloc_profile(extent_root, flags); space_info = __find_space_info(extent_root->fs_info, flags); @@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } BUG_ON(!space_info); +again: spin_lock(&space_info->lock); if (space_info->force_alloc) - force = 1; + force = space_info->force_alloc; if (space_info->full) { spin_unlock(&space_info->lock); - goto out; + return 0; } - if (!force && !should_alloc_chunk(extent_root, space_info, - alloc_bytes)) { + if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { spin_unlock(&space_info->lock); - goto out; + return 0; + } else if (space_info->chunk_alloc) { + wait_for_alloc = 1; + } else { + space_info->chunk_alloc = 1; } + spin_unlock(&space_info->lock); + mutex_lock(&fs_info->chunk_mutex); + + /* + * The chunk_mutex is held throughout the entirety of a chunk + * allocation, so once we've acquired the chunk_mutex we know that the + * other guy is done and we need to recheck and see if we should + * allocate. + */ + if (wait_for_alloc) { + mutex_unlock(&fs_info->chunk_mutex); + wait_for_alloc = 0; + goto again; + } + /* * If we have mixed data/metadata chunks we want to make sure we keep * allocating mixed chunks instead of individual chunks. @@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->full = 1; else ret = 1; - space_info->force_alloc = 0; + + space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; + space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); -out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; } @@ -5303,11 +5368,13 @@ loop: if (allowed_chunk_alloc) { ret = do_chunk_alloc(trans, root, num_bytes + - 2 * 1024 * 1024, data, 1); + 2 * 1024 * 1024, data, + CHUNK_ALLOC_LIMITED); allowed_chunk_alloc = 0; done_chunk_alloc = 1; - } else if (!done_chunk_alloc) { - space_info->force_alloc = 1; + } else if (!done_chunk_alloc && + space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) { + space_info->force_alloc = CHUNK_ALLOC_LIMITED; } if (loop < LOOP_NO_EMPTY_SIZE) { @@ -5393,7 +5460,8 @@ again: */ if (empty_size || root->ref_cows) ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data, 0); + num_bytes + 2 * 1024 * 1024, data, + CHUNK_ALLOC_NO_FORCE); WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, @@ -5405,7 +5473,7 @@ again: num_bytes = num_bytes & ~(root->sectorsize - 1); num_bytes = max(num_bytes, min_alloc_size); do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, data, 1); + num_bytes, data, CHUNK_ALLOC_FORCE); goto again; } if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { @@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, alloc_flags = update_block_group_flags(root, cache->flags); if (alloc_flags != cache->flags) - do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); ret = set_block_group_ro(cache); if (!ret) goto out; alloc_flags = get_alloc_profile(root, cache->space_info->flags); - ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); if (ret < 0) goto out; ret = set_block_group_ro(cache); @@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type) { u64 alloc_flags = get_alloc_profile(root, type); - return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); } /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 20ddb28602a..31513860508 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state, } } +static void uncache_state(struct extent_state **cached_ptr) +{ + if (cached_ptr && (*cached_ptr)) { + struct extent_state *state = *cached_ptr; + *cached_ptr = NULL; + free_extent_state(state); + } +} + /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed. @@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, } int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) + struct extent_state **cached_state, gfp_t mask) { - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - NULL, mask); + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, + NULL, cached_state, mask); } static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, @@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, mask); } -int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, mask); @@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; + struct extent_state *cached = NULL; + struct extent_state *state; + tree = &BTRFS_I(page->mapping->host)->io_tree; start = ((u64)page->index << PAGE_CACHE_SHIFT) + @@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); + if (state && state->start == start) { + /* + * take a reference on the state, unlock will drop + * the ref + */ + cache_state(state, &cached); + } + spin_unlock(&tree->lock); + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, - NULL); + state); if (ret) uptodate = 0; } @@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; + uncache_state(&cached); continue; } } if (uptodate) { - set_extent_uptodate(tree, start, end, + set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); } - unlock_extent(tree, start, end, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); if (whole_page) { if (uptodate) { @@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) do { struct page *page = bvec->bv_page; + struct extent_state *cached = NULL; tree = &BTRFS_I(page->mapping->host)->io_tree; start = ((u64)page->index << PAGE_CACHE_SHIFT) + @@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) prefetchw(&bvec->bv_page->flags); if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); + set_extent_uptodate(tree, start, end, &cached, + GFP_ATOMIC); } else { ClearPageUptodate(page); SetPageError(page); } - unlock_extent(tree, start, end, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); } while (bvec >= bio->bi_io_vec); @@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, while (cur <= end) { if (cur >= last_byte) { char *userpage; + struct extent_state *cached = NULL; + iosize = PAGE_CACHE_SIZE - page_offset; userpage = kmap_atomic(page, KM_USER0); memset(userpage + page_offset, 0, iosize); flush_dcache_page(page); kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + &cached, GFP_NOFS); + unlock_extent_cached(tree, cur, cur + iosize - 1, + &cached, GFP_NOFS); break; } em = get_extent(inode, page, page_offset, cur, @@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { char *userpage; + struct extent_state *cached = NULL; + userpage = kmap_atomic(page, KM_USER0); memset(userpage + page_offset, 0, iosize); flush_dcache_page(page); kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + &cached, GFP_NOFS); + unlock_extent_cached(tree, cur, cur + iosize - 1, + &cached, GFP_NOFS); cur = cur + iosize; page_offset += iosize; continue; @@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree, iocount++; block_start = block_start + iosize; } else { - set_extent_uptodate(tree, block_start, cur_end, + struct extent_state *cached = NULL; + + set_extent_uptodate(tree, block_start, cur_end, &cached, GFP_NOFS); - unlock_extent(tree, block_start, cur_end, GFP_NOFS); + unlock_extent_cached(tree, block_start, cur_end, + &cached, GFP_NOFS); block_start = cur_end + 1; } page_offset = block_start & (PAGE_CACHE_SIZE - 1); @@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); + NULL, GFP_NOFS); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || @@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page, kunmap_atomic(dst_kaddr, KM_USER0); } +static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) +{ + unsigned long distance = (src > dst) ? src - dst : dst - src; + return distance < len; +} + static void copy_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) @@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page, char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); char *src_kaddr; - if (dst_page != src_page) + if (dst_page != src_page) { src_kaddr = kmap_atomic(src_page, KM_USER1); - else + } else { src_kaddr = dst_kaddr; + BUG_ON(areas_overlap(src_off, dst_off, len)); + } memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); kunmap_atomic(dst_kaddr, KM_USER0); @@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, "len %lu len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } - if (dst_offset < src_offset) { + if (!areas_overlap(src_offset, dst_offset, len)) { memcpy_extent_buffer(dst, dst_offset, src_offset, len); return; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f62c5442835..af2d7179c37 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int exclusive_bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask); + struct extent_state **cached_state, gfp_t mask); int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e621ea54a3f..75899a01dde 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, /* * unlocks pages after btrfs_file_write is done with them */ -static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) +void btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { @@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) * this also makes the decision about creating an inline extent vs * doing real data extents, marking pages dirty and delalloc as required. */ -static noinline int dirty_and_release_pages(struct btrfs_root *root, - struct file *file, - struct page **pages, - size_t num_pages, - loff_t pos, - size_t write_bytes) +int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, + struct page **pages, size_t num_pages, + loff_t pos, size_t write_bytes, + struct extent_state **cached) { int err = 0; int i; - struct inode *inode = fdentry(file)->d_inode; u64 num_bytes; u64 start_pos; u64 end_of_last_block; @@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root, end_of_last_block = start_pos + num_bytes - 1; err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, - NULL); + cached); if (err) return err; @@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, } if (copied > 0) { - ret = dirty_and_release_pages(root, file, pages, - dirty_pages, pos, - copied); + ret = btrfs_dirty_pages(root, inode, pages, + dirty_pages, pos, copied, + NULL); if (ret) { btrfs_delalloc_release_space(inode, dirty_pages << PAGE_CACHE_SHIFT); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f561c953205..11d2e9cea09 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode; struct rb_node *node; struct list_head *pos, *n; + struct page **pages; struct page *page; struct extent_state *cached_state = NULL; struct btrfs_free_cluster *cluster = NULL; @@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root, u64 start, end, len; u64 bytes = 0; u32 *crc, *checksums; - pgoff_t index = 0, last_index = 0; unsigned long first_page_offset; - int num_checksums; + int index = 0, num_pages = 0; int entries = 0; int bitmaps = 0; int ret = 0; bool next_page = false; + bool out_of_space = false; root = root->fs_info->tree_root; @@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root, return 0; } - last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; filemap_write_and_wait(inode->i_mapping); btrfs_wait_ordered_range(inode, inode->i_size & ~(root->sectorsize - 1), (u64)-1); /* We need a checksum per page. */ - num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; - crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); + crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); if (!crc) { iput(inode); return 0; } + pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); + if (!pages) { + kfree(crc); + iput(inode); + return 0; + } + /* Since the first page has all of our checksums and our generation we * need to calculate the offset into the page that we can start writing * our entries. */ - first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); + first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); /* Get the cluster for this block_group if it exists */ if (!list_empty(&block_group->cluster_list)) @@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, * after find_get_page at this point. Just putting this here so people * know and don't freak out. */ - while (index <= last_index) { + while (index < num_pages) { page = grab_cache_page(inode->i_mapping, index); if (!page) { - pgoff_t i = 0; + int i; - while (i < index) { - page = find_get_page(inode->i_mapping, i); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); - i++; + for (i = 0; i < num_pages; i++) { + unlock_page(pages[i]); + page_cache_release(pages[i]); } goto out_free; } + pages[index] = page; index++; } @@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, offset = start_offset; } - page = find_get_page(inode->i_mapping, index); + if (index >= num_pages) { + out_of_space = true; + break; + } + + page = pages[index]; addr = kmap(page); entry = addr + start_offset; @@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, bytes += PAGE_CACHE_SIZE; - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - - /* - * We need to release our reference we got for grab_cache_page, - * except for the first page which will hold our checksums, we - * do that below. - */ - if (index != 0) { - unlock_page(page); - page_cache_release(page); - } - - page_cache_release(page); - index++; } while (node || next_page); @@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root, struct btrfs_free_space *entry = list_entry(pos, struct btrfs_free_space, list); - page = find_get_page(inode->i_mapping, index); + if (index >= num_pages) { + out_of_space = true; + break; + } + page = pages[index]; addr = kmap(page); memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); @@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root, crc++; bytes += PAGE_CACHE_SIZE; - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); list_del_init(&entry->list); index++; } + if (out_of_space) { + btrfs_drop_pages(pages, num_pages); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, + i_size_read(inode) - 1, &cached_state, + GFP_NOFS); + ret = 0; + goto out_free; + } + /* Zero out the rest of the pages just to make sure */ - while (index <= last_index) { + while (index < num_pages) { void *addr; - page = find_get_page(inode->i_mapping, index); - + page = pages[index]; addr = kmap(page); memset(addr, 0, PAGE_CACHE_SIZE); kunmap(page); - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); bytes += PAGE_CACHE_SIZE; index++; } - btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); - /* Write the checksums and trans id to the first page */ { void *addr; u64 *gen; - page = find_get_page(inode->i_mapping, 0); + page = pages[0]; addr = kmap(page); - memcpy(addr, checksums, sizeof(u32) * num_checksums); - gen = addr + (sizeof(u32) * num_checksums); + memcpy(addr, checksums, sizeof(u32) * num_pages); + gen = addr + (sizeof(u32) * num_pages); *gen = trans->transid; kunmap(page); - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); } - BTRFS_I(inode)->generation = trans->transid; + ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, + bytes, &cached_state); + btrfs_drop_pages(pages, num_pages); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state, GFP_NOFS); + if (ret) { + ret = 0; + goto out_free; + } + + BTRFS_I(inode)->generation = trans->transid; + filemap_write_and_wait(inode->i_mapping); key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -853,6 +845,7 @@ out_free: BTRFS_I(inode)->generation = 0; } kfree(checksums); + kfree(pages); btrfs_update_inode(trans, root, inode); iput(inode); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5cc64ab9c48..fcd66b6a808 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1770,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); - btrfs_ordered_update_i_size(inode, 0, ordered_extent); - ret = btrfs_update_inode(trans, root, inode); - BUG_ON(ret); + ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); + if (!ret) { + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + } + ret = 0; out: if (nolock) { if (trans) @@ -2590,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *item, struct inode *inode) { + if (!leaf->map_token) + map_private_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_inode_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + btrfs_set_inode_uid(leaf, item, inode->i_uid); btrfs_set_inode_gid(leaf, item, inode->i_gid); btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); @@ -2618,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } } /* @@ -4207,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_key found_key; struct btrfs_path *path; int ret; - u32 nritems; struct extent_buffer *leaf; int slot; - int advance; unsigned char d_type; int over = 0; u32 di_cur; @@ -4253,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; - advance = 0; while (1) { leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; - if (advance || slot >= nritems) { - if (slot >= nritems - 1) { - ret = btrfs_next_leaf(root, path); - if (ret) - break; - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - slot = path->slots[0]; - } else { - slot++; - path->slots[0]++; - } + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto err; + else if (ret > 0) + break; + continue; } - advance = 1; item = btrfs_item_nr(leaf, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot);< |