diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 664 |
1 files changed, 383 insertions, 281 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fe443fece85..09582b81640 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void) state = list_entry(states.next, struct extent_state, leak_list); printk(KERN_ERR "btrfs state leak: start %llu end %llu " "state %lu in tree %p refs %d\n", - (unsigned long long)state->start, - (unsigned long long)state->end, - state->state, state->tree, atomic_read(&state->refs)); + state->start, state->end, state->state, state->tree, + atomic_read(&state->refs)); list_del(&state->leak_list); kmem_cache_free(extent_state_cache, state); } @@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); printk(KERN_ERR "btrfs buffer leak start %llu len %lu " - "refs %d\n", (unsigned long long)eb->start, - eb->len, atomic_read(&eb->refs)); + "refs %d\n", + eb->start, eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } @@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { printk_ratelimited(KERN_DEBUG "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", - caller, - (unsigned long long)btrfs_ino(inode), - (unsigned long long)isize, - (unsigned long long)start, - (unsigned long long)end); + caller, btrfs_ino(inode), isize, start, end); } } #else @@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree, if (end < start) WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", - (unsigned long long)end, - (unsigned long long)start); + end, start); state->start = start; state->end = end; @@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree, struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); printk(KERN_ERR "btrfs found node %llu %llu on insert of " - "%llu %llu\n", (unsigned long long)found->start, - (unsigned long long)found->end, - (unsigned long long)start, (unsigned long long)end); + "%llu %llu\n", + found->start, found->end, start, end); return -EEXIST; } state->tree = tree; @@ -762,15 +755,6 @@ static void cache_state(struct extent_state *state, } } -static void uncache_state(struct extent_state **cached_ptr) -{ - if (cached_ptr && (*cached_ptr)) { - struct extent_state *state = *cached_ptr; - *cached_ptr = NULL; - free_extent_state(state); - } -} - /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed. @@ -1687,31 +1671,21 @@ out_failed: return found; } -int extent_clear_unlock_delalloc(struct inode *inode, - struct extent_io_tree *tree, - u64 start, u64 end, struct page *locked_page, - unsigned long op) +int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, + struct page *locked_page, + unsigned long clear_bits, + unsigned long page_ops) { + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; int ret; struct page *pages[16]; unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; unsigned long nr_pages = end_index - index + 1; int i; - unsigned long clear_bits = 0; - - if (op & EXTENT_CLEAR_UNLOCK) - clear_bits |= EXTENT_LOCKED; - if (op & EXTENT_CLEAR_DIRTY) - clear_bits |= EXTENT_DIRTY; - - if (op & EXTENT_CLEAR_DELALLOC) - clear_bits |= EXTENT_DELALLOC; clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); - if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | - EXTENT_SET_PRIVATE2))) + if (page_ops == 0) return 0; while (nr_pages > 0) { @@ -1720,20 +1694,20 @@ int extent_clear_unlock_delalloc(struct inode *inode, nr_pages, ARRAY_SIZE(pages)), pages); for (i = 0; i < ret; i++) { - if (op & EXTENT_SET_PRIVATE2) + if (page_ops & PAGE_SET_PRIVATE2) SetPagePrivate2(pages[i]); if (pages[i] == locked_page) { page_cache_release(pages[i]); continue; } - if (op & EXTENT_CLEAR_DIRTY) + if (page_ops & PAGE_CLEAR_DIRTY) clear_page_dirty_for_io(pages[i]); - if (op & EXTENT_SET_WRITEBACK) + if (page_ops & PAGE_SET_WRITEBACK) set_page_writeback(pages[i]); - if (op & EXTENT_END_WRITEBACK) + if (page_ops & PAGE_END_WRITEBACK) end_page_writeback(pages[i]); - if (op & EXTENT_CLEAR_UNLOCK_PAGE) + if (page_ops & PAGE_UNLOCK) unlock_page(pages[i]); page_cache_release(pages[i]); } @@ -1810,7 +1784,7 @@ out: * set the private field for a given byte offset in the tree. If there isn't * an extent_state there already, this does nothing. */ -int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { struct rb_node *node; struct extent_state *state; @@ -1837,64 +1811,6 @@ out: return ret; } -void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[], - int count) -{ - struct rb_node *node; - struct extent_state *state; - - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - BUG_ON(!node); - - state = rb_entry(node, struct extent_state, rb_node); - BUG_ON(state->start != start); - - while (count) { - state->private = *csums++; - count--; - state = next_state(state); - } - spin_unlock(&tree->lock); -} - -static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index) -{ - struct bio_vec *bvec = bio->bi_io_vec + bio_index; - - return page_offset(bvec->bv_page) + bvec->bv_offset; -} - -void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index, - u32 csums[], int count) -{ - struct rb_node *node; - struct extent_state *state = NULL; - u64 start; - - spin_lock(&tree->lock); - do { - start = __btrfs_get_bio_offset(bio, bio_index); - if (state == NULL || state->start != start) { - node = tree_search(tree, start); - BUG_ON(!node); - - state = rb_entry(node, struct extent_state, rb_node); - BUG_ON(state->start != start); - } - state->private = *csums++; - count--; - bio_index++; - - state = next_state(state); - } while (count); - spin_unlock(&tree->lock); -} - int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) { struct rb_node *node; @@ -2173,7 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page) EXTENT_LOCKED); spin_unlock(&BTRFS_I(inode)->io_tree.lock); - if (state && state->start == failrec->start) { + if (state && state->start <= failrec->start && + state->end >= failrec->start + failrec->len - 1) { fs_info = BTRFS_I(inode)->root->fs_info; num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); @@ -2201,9 +2118,9 @@ out: * needed */ -static int bio_readpage_error(struct bio *failed_bio, struct page *page, - u64 start, u64 end, int failed_mirror, - struct extent_state *state) +static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, + struct page *page, u64 start, u64 end, + int failed_mirror) { struct io_failure_record *failrec = NULL; u64 private; @@ -2213,6 +2130,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct bio *bio; + struct btrfs_io_bio *btrfs_failed_bio; + struct btrfs_io_bio *btrfs_bio; int num_copies; int ret; int read_mode; @@ -2296,23 +2215,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, * all the retry and error correction code that follows. no * matter what the error is, it is very likely to persist. */ - pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " - "state=%p, num_copies=%d, next_mirror %d, " - "failed_mirror %d\n", state, num_copies, - failrec->this_mirror, failed_mirror); + pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", + num_copies, failrec->this_mirror, failed_mirror); free_io_failure(inode, failrec, 0); return -EIO; } - if (!state) { - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, failrec->start, - EXTENT_LOCKED); - if (state && state->start != failrec->start) - state = NULL; - spin_unlock(&tree->lock); - } - /* * there are two premises: * a) deliver good data to the caller @@ -2349,9 +2257,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, read_mode = READ_SYNC; } - if (!state || failrec->this_mirror > num_copies) { - pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " - "next_mirror %d, failed_mirror %d\n", state, + if (failrec->this_mirror > num_copies) { + pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", num_copies, failrec->this_mirror, failed_mirror); free_io_failure(inode, failrec, 0); return -EIO; @@ -2362,12 +2269,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, free_io_failure(inode, failrec, 0); return -EIO; } - bio->bi_private = state; bio->bi_end_io = failed_bio->bi_end_io; bio->bi_sector = failrec->logical >> 9; bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio->bi_size = 0; + btrfs_failed_bio = btrfs_io_bio(failed_bio); + if (btrfs_failed_bio->csum) { + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = btrfs_bio->csum_inline; + phy_offset >>= inode->i_sb->s_blocksize_bits; + phy_offset *= csum_size; + memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset, + csum_size); + } + bio_add_page(bio, page, failrec->len, start - page_offset(page)); pr_debug("bio_readpage_error: submitting new read[%#x] to " @@ -2450,6 +2369,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err) bio_put(bio); } +static void +endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, + int uptodate) +{ + struct extent_state *cached = NULL; + u64 end = start + len - 1; + + if (uptodate && tree->track_uptodate) + set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); +} + /* * after a readpage IO is done, we need to: * clear the uptodate bits on error @@ -2466,9 +2397,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree; + u64 offset = 0; u64 start; u64 end; + u64 len; + u64 extent_start = 0; + u64 extent_len = 0; int mirror; int ret; @@ -2477,9 +2413,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; - struct extent_state *cached = NULL; - struct extent_state *state; - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct inode *inode = page->mapping->host; pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " @@ -2500,37 +2433,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err) start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; + len = bvec->bv_len; if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); - if (state && state->start == start) { - /* - * take a reference on the state, unlock will drop - * the ref - */ - cache_state(state, &cached); - } - spin_unlock(&tree->lock); - mirror = io_bio->mirror_num; - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end, - state, mirror); + if (likely(uptodate && tree->ops && + tree->ops->readpage_end_io_hook)) { + ret = tree->ops->readpage_end_io_hook(io_bio, offset, + page, start, end, + mirror); if (ret) uptodate = 0; else clean_io_failure(start, page); } - if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { + if (likely(uptodate)) + goto readpage_ok; + + if (tree->ops && tree->ops->readpage_io_failed_hook) { ret = tree->ops->readpage_io_failed_hook(page, mirror); if (!ret && !err && test_bit(BIO_UPTODATE, &bio->bi_flags)) uptodate = 1; - } else if (!uptodate) { + } else { /* * The generic bio_readpage_error handles errors the * following way: If possible, new read requests are @@ -2541,24 +2469,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err) * can't handle the error it will return -EIO and we * remain responsible for that page. */ - ret = bio_readpage_error(bio, page, start, end, mirror, NULL); + ret = bio_readpage_error(bio, offset, page, start, end, + mirror); if (ret == 0) { uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; - uncache_state(&cached); continue; } } - - if (uptodate && tree->track_uptodate) { - set_extent_uptodate(tree, start, end, &cached, - GFP_ATOMIC); - } - unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); - - if (uptodate) { +readpage_ok: + if (likely(uptodate)) { loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; @@ -2573,8 +2495,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err) SetPageError(page); } unlock_page(page); + offset += len; + + if (unlikely(!uptodate)) { + if (extent_len) { + endio_readpage_release_extent(tree, + extent_start, + extent_len, 1); + extent_start = 0; + extent_len = 0; + } + endio_readpage_release_extent(tree, start, + end - start + 1, 0); + } else if (!extent_len) { + extent_start = start; + extent_len = end + 1 - start; + } else if (extent_start + extent_len == start) { + extent_len += end + 1 - start; + } else { + endio_readpage_release_extent(tree, extent_start, + extent_len, uptodate); + extent_start = start; + extent_len = end + 1 - start; + } } while (bvec <= bvec_end); + if (extent_len) + endio_readpage_release_extent(tree, extent_start, extent_len, + uptodate); + if (io_bio->end_io) + io_bio->end_io(io_bio, err); bio_put(bio); } @@ -2586,6 +2536,7 @@ struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags) { + struct btrfs_io_bio *btrfs_bio; struct bio *bio; bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); @@ -2601,6 +2552,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, bio->bi_size = 0; bio->bi_bdev = bdev; bio->bi_sector = first_sector; + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = NULL; + btrfs_bio->csum_allocated = NULL; + btrfs_bio->end_io = NULL; } return bio; } @@ -2614,7 +2569,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) /* this also allocates from the btrfs_bioset */ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); + struct btrfs_io_bio *btrfs_bio; + struct bio *bio; + + bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); + if (bio) { + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = NULL; + btrfs_bio->csum_allocated = NULL; + btrfs_bio->end_io = NULL; + } + return bio; } @@ -2738,17 +2703,45 @@ void set_page_extent_mapped(struct page *page) } } +static struct extent_map * +__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, + u64 start, u64 len, get_extent_t *get_extent, + struct extent_map **em_cached) +{ + struct extent_map *em; + + if (em_cached && *em_cached) { + em = *em_cached; + if (em->in_tree && start >= em->start && + start < extent_map_end(em)) { + atomic_inc(&em->refs); + return em; + } + + free_extent_map(em); + *em_cached = NULL; + } + + em = get_extent(inode, page, pg_offset, start, len, 0); + if (em_cached && !IS_ERR_OR_NULL(em)) { + BUG_ON(*em_cached); + atomic_inc(&em->refs); + *em_cached = em; + } + return em; +} /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io * handlers) * XXX JDM: This needs looking at to ensure proper page locking */ -static int __extent_read_full_page(struct extent_io_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) +static int __do_readpage(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2762,35 +2755,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree, sector_t sector; struct extent_map *em; struct block_device *bdev; - struct btrfs_ordered_extent *ordered; int ret; int nr = 0; + int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED; size_t pg_offset = 0; size_t iosize; size_t disk_io_size; size_t blocksize = inode->i_sb->s_blocksize; - unsigned long this_bio_flag = 0; + unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED; set_page_extent_mapped(page); + end = page_end; if (!PageUptodate(page)) { if (cleancache_get_page(page) == 0) { BUG_ON(blocksize != PAGE_SIZE); + unlock_extent(tree, start, end); goto out; } } - end = page_end; - while (1) { - lock_extent(tree, start, end); - ordered = btrfs_lookup_ordered_extent(inode, start); - if (!ordered) - break; - unlock_extent(tree, start, end); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } - if (page->index == last_byte >> PAGE_CACHE_SHIFT) { char *userpage; size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); @@ -2817,15 +2801,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree, kunmap_atomic(userpage); set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); - unlock_extent_cached(tree, cur, cur + iosize - 1, - &cached, GFP_NOFS); + if (!parent_locked) + unlock_extent_cached(tree, cur, + cur + iosize - 1, + &cached, GFP_NOFS); break; } - em = get_extent(inode, page, pg_offset, cur, - end - cur + 1, 0); + em = __get_extent_map(inode, page, pg_offset, cur, + end - cur + 1, get_extent, em_cached); if (IS_ERR_OR_NULL(em)) { SetPageError(page); - unlock_extent(tree, cur, end); + if (!parent_locked) + unlock_extent(tree, cur, end); break; } extent_offset = cur - em->start; @@ -2833,7 +2820,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, BUG_ON(end < cur); if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - this_bio_flag = EXTENT_BIO_COMPRESSED; + this_bio_flag |= EXTENT_BIO_COMPRESSED; extent_set_compress_type(&this_bio_flag, em->compress_type); } @@ -2877,7 +2864,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1, NULL)) { check_page_uptodate(tree, page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; pg_offset += iosize; continue; @@ -2887,7 +2875,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, */ if (block_start == EXTENT_MAP_INLINE) { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; pg_offset += iosize; continue; @@ -2905,7 +2894,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, *bio_flags = this_bio_flag; } else { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); } cur = cur + iosize; pg_offset += iosize; @@ -2919,6 +2909,104 @@ out: return 0; } +static inline void __do_contiguous_readpages(struct extent_io_tree *tree, + struct page *pages[], int nr_pages, + u64 start, u64 end, + get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + struct inode *inode; + struct btrfs_ordered_extent *ordered; + int index; + + inode = pages[0]->mapping->host; + while (1) { + lock_extent(tree, start, end); + ordered = btrfs_lookup_ordered_range(inode, start, + end - start + 1); + if (!ordered) + break; + unlock_extent(tree, start, end); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + for (index = 0; index < nr_pages; index++) { + __do_readpage(tree, pages[index], get_extent, em_cached, bio, + mirror_num, bio_flags, rw); + page_cache_release(pages[index]); + } +} + +static void __extent_readpages(struct extent_io_tree *tree, + struct page *pages[], + int nr_pages, get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + u64 start = 0; + u64 end = 0; + u64 page_start; + int index; + int first_index = 0; + + for (index = 0; index < nr_pages; index++) { + page_start = page_offset(pages[index]); + if (!end) { + start = page_start; + end = start + PAGE_CACHE_SIZE - 1; + first_index = index; + } else if (end + 1 == page_start) { + end += PAGE_CACHE_SIZE; + } else { + __do_contiguous_readpages(tree, &pages[first_index], + index - first_index, start, + end, get_extent, em_cached, + bio, mirror_num, bio_flags, + rw); + start = page_start; + end = start + PAGE_CACHE_SIZE - 1; + first_index = index; + } + } + + if (end) + __do_contiguous_readpages(tree, &pages[first_index], + index - first_index, start, + end, get_extent, em_cached, bio, + mirror_num, bio_flags, rw); +} + +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + struct inode *inode = page->mapping->host; + struct btrfs_ordered_extent *ordered; + u64 start = page_offset(page); + u64 end = start + PAGE_CACHE_SIZE - 1; + int ret; + + while (1) { + lock_extent(tree, start, end); + ordered = btrfs_lookup_ordered_extent(inode, start); + if (!ordered) + break; + unlock_extent(tree, start, end); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, + bio_flags, rw); + return ret; +} + int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num) { @@ -2933,6 +3021,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, return ret; } +int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, int mirror_num) +{ + struct bio *bio = NULL; + unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED; + int ret; + + ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, + &bio_flags, READ); + if (bio) + ret = submit_one_bio(READ, bio, mirror_num, bio_flags); + return ret; +} + static noinline void update_nr_written(struct page *page, struct writeback_control *wbc, unsigned long nr_written) @@ -3189,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (!PageWriteback(page)) { printk(KERN_ERR "btrfs warning page %lu not " "writeback, cur %llu end %llu\n", - page->index, (unsigned long long)cur, - (unsigned long long)end); + page->index, cur, end); } ret = submit_extent_page(write_flags, tree, page, @@ -3769,7 +3870,7 @@ int extent_readpages(struct extent_io_tree *tree, unsigned long bio_flags = 0; struct page *pagepool[16]; struct page *page; - int i = 0; + struct extent_map *em_cached = NULL; int nr = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -3786,18 +3887,16 @@ int extent_readpages(struct extent_io_tree *tree, pagepool[nr++] = page; if (nr < ARRAY_SIZE(pagepool)) continue; - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags, READ); - page_cache_release(pagepool[i]); - } + __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, + &bio, 0, &bio_flags, READ); nr = 0; } - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags, READ); - page_cache_release(pagepool[i]); - } + if (nr) + __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, + &bio, 0, &bio_flags, READ); + + if (em_cached) + free_extent_map(em_cached); BUG_ON(!list_empty(pages)); if (bio) @@ -4136,6 +4235,76 @@ static void __free_extent_buffer(struct extent_buffer *eb) kmem_cache_free(extent_buffer_cache, eb); } +static int extent_buffer_under_io(struct extent_buffer *eb) +{ + return (atomic_read(&eb->io_pages) || + test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || + test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); +} + +/* + * Helper for releasing extent buffer page. + */ +static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, + unsigned long start_idx) +{ + unsigned long index; + unsigned long num_pages; + struct page *page; + int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); + + BUG_ON(extent_buffer_under_io(eb)); + + num_pages = num_extent_pages(eb->start, eb->len); + index = start_idx + num_pages; + if (start_idx >= index) + return; + + do { + index--; + page = extent_buffer_page(eb, index); + if (page && mapped) { + spin_lock(&page->mapping->private_lock); + /* + * We do this since we'll remove the pages after we've + * removed the eb from the radix tree, so we could race + * and have this page now attached to the new eb. So + * only clear page_private if it's still connected to + * this eb. + */ + if (PagePrivate(page) && + page->private == (unsigned long)eb) { + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + BUG_ON(PageDirty(page)); + BUG_ON(PageWriteback(page)); + /* + * We need to make sure we haven't be attached + * to a new eb. + */ + ClearPagePrivate(page); + set_page_private(page, 0); + /* One for the page private */ + page_cache_release(page); + } + spin_unlock(&page->mapping->private_lock); + + } + if (page) { + /* One for when we alloced the page */ + page_cache_release(page); + } + } while (index != start_idx); +} + +/* + * Helper for releasing the extent buffer. + */ +static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) +{ + btrfs_release_extent_buffer_page(eb, 0); + __free_extent_buffer(eb); +} + static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, @@ -4184,13 +4353,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) struct extent_buffer *new; unsigned long num_pages = num_extent_pages(src->start, src->len); - new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); + new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); if (new == NULL) return NULL; for (i = 0; i < num_pages; i++) { - p = alloc_page(GFP_ATOMIC); - BUG_ON(!p); + p = alloc_page(GFP_NOFS); + if (!p) { + btrfs_release_extent_buffer(new); + return NULL; + } attach_extent_buffer_page(new, p); WARN_ON(PageDirty(p)); SetPageUptodate(p); @@ -4210,12 +4382,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) unsigned long num_pages = num_extent_pages(0, len); unsigned long i; - eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); + eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); if (!eb) return NULL; for (i = 0; i < num_pages; i++) { - eb->pages[i] = alloc_page(GFP_ATOMIC); + eb->pages[i] = alloc_page(GFP_NOFS); if (!eb->pages[i]) goto err; } @@ -4231,76 +4403,6 @@ err: return NULL; } -static int extent_buffer_under_io(struct extent_buffer *eb) -{ - return (atomic_read(&eb->io_pages) || - test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || - test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); -} - -/* - * Helper for releasing extent buffer page. - */ -static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, - unsigned long start_idx) -{ - unsigned long index; - unsigned long num_pages; - struct page *page; - int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); - - BUG_ON(extent_buffer_under_io(eb)); - - num_pages = num_extent_pages(eb->start, eb->len); - index = start_idx + num_pages; - if (start_idx >= index) - return; - - do { - index--; - page = extent_buffer_page(eb, index); - if (page && mapped) { - spin_lock(&page->mapping->private_lock); - /* - * We do this since we'll remove the pages after we've - * removed the eb from the radix tree, so we could race - * and have this page now attached to the new eb. So - * only clear page_private if it's still connected to - * this eb. - */ - if (PagePrivate(page) && - page->private == (unsigned long)eb) { - BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(PageDirty(page)); - BUG_ON(PageWriteback(page)); - /* - * We need to make sure we haven't be attached - * to a new eb. - */ - ClearPagePrivate(page); - set_page_private(page, 0); - /* One for the page private */ - page_cache_release(page); - } - spin_unlock(&page->mapping->private_lock); - - } - if (page) { - /* One for when we alloced the page */ - page_cache_release(page); - } - } while (index != start_idx); -} - -/* - * Helper for releasing the extent buffer. - */ -static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) -{ - btrfs_release_extent_buffer_page(eb, 0); - __free_extent_buffer(eb); -} - static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; @@ -4771,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4813,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, if (start + min_len > eb->len) { WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " - "wanted %lu %lu\n", (unsigned long long)eb->start, - eb->len, start, min_len); + "wanted %lu %lu\n", + eb->start, eb->len, start, min_len); return -EINVAL; } @@ -4841,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4875,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4905,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4936,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, WARN_ON(src->len != dst_len); offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(dst, i); @@ -5022,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, while (len > 0) { dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; @@ -5075,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); |