aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c3717
1 files changed, 3717 insertions, 0 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
new file mode 100644
index 00000000000..e086d407f1f
--- /dev/null
+++ b/fs/btrfs/extent_io.c
@@ -0,0 +1,3717 @@
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/bio.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/page-flags.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h>
+#include <linux/version.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include "extent_io.h"
+#include "extent_map.h"
+#include "compat.h"
+#include "ctree.h"
+#include "btrfs_inode.h"
+
+/* temporary define until extent_map moves out of btrfs */
+struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
+ unsigned long extra_flags,
+ void (*ctor)(void *, struct kmem_cache *,
+ unsigned long));
+
+static struct kmem_cache *extent_state_cache;
+static struct kmem_cache *extent_buffer_cache;
+
+static LIST_HEAD(buffers);
+static LIST_HEAD(states);
+
+#define LEAK_DEBUG 0
+#ifdef LEAK_DEBUG
+static DEFINE_SPINLOCK(leak_lock);
+#endif
+
+#define BUFFER_LRU_MAX 64
+
+struct tree_entry {
+ u64 start;
+ u64 end;
+ struct rb_node rb_node;
+};
+
+struct extent_page_data {
+ struct bio *bio;
+ struct extent_io_tree *tree;
+ get_extent_t *get_extent;
+
+ /* tells writepage not to lock the state bits for this range
+ * it still does the unlocking
+ */
+ int extent_locked;
+};
+
+int __init extent_io_init(void)
+{
+ extent_state_cache = btrfs_cache_create("extent_state",
+ sizeof(struct extent_state), 0,
+ NULL);
+ if (!extent_state_cache)
+ return -ENOMEM;
+
+ extent_buffer_cache = btrfs_cache_create("extent_buffers",
+ sizeof(struct extent_buffer), 0,
+ NULL);
+ if (!extent_buffer_cache)
+ goto free_state_cache;
+ return 0;
+
+free_state_cache:
+ kmem_cache_destroy(extent_state_cache);
+ return -ENOMEM;
+}
+
+void extent_io_exit(void)
+{
+ struct extent_state *state;
+ struct extent_buffer *eb;
+
+ while (!list_empty(&states)) {
+ state = list_entry(states.next, struct extent_state, leak_list);
+ printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+ "state %lu in tree %p refs %d\n",
+ (unsigned long long)state->start,
+ (unsigned long long)state->end,
+ state->state, state->tree, atomic_read(&state->refs));
+ list_del(&state->leak_list);
+ kmem_cache_free(extent_state_cache, state);
+
+ }
+
+ while (!list_empty(&buffers)) {
+ eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+ printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+ "refs %d\n", (unsigned long long)eb->start,
+ eb->len, atomic_read(&eb->refs));
+ list_del(&eb->leak_list);
+ kmem_cache_free(extent_buffer_cache, eb);
+ }
+ if (extent_state_cache)
+ kmem_cache_destroy(extent_state_cache);
+ if (extent_buffer_cache)
+ kmem_cache_destroy(extent_buffer_cache);
+}
+
+void extent_io_tree_init(struct extent_io_tree *tree,
+ struct address_space *mapping, gfp_t mask)
+{
+ tree->state.rb_node = NULL;
+ tree->buffer.rb_node = NULL;
+ tree->ops = NULL;
+ tree->dirty_bytes = 0;
+ spin_lock_init(&tree->lock);
+ spin_lock_init(&tree->buffer_lock);
+ tree->mapping = mapping;
+}
+
+static struct extent_state *alloc_extent_state(gfp_t mask)
+{
+ struct extent_state *state;
+#ifdef LEAK_DEBUG
+ unsigned long flags;
+#endif
+
+ state = kmem_cache_alloc(extent_state_cache, mask);
+ if (!state)
+ return state;
+ state->state = 0;
+ state->private = 0;
+ state->tree = NULL;
+#ifdef LEAK_DEBUG
+ spin_lock_irqsave(&leak_lock, flags);
+ list_add(&state->leak_list, &states);
+ spin_unlock_irqrestore(&leak_lock, flags);
+#endif
+ atomic_set(&state->refs, 1);
+ init_waitqueue_head(&state->wq);
+ return state;
+}
+
+static void free_extent_state(struct extent_state *state)
+{
+ if (!state)
+ return;
+ if (atomic_dec_and_test(&state->refs)) {
+#ifdef LEAK_DEBUG
+ unsigned long flags;
+#endif
+ WARN_ON(state->tree);
+#ifdef LEAK_DEBUG
+ spin_lock_irqsave(&leak_lock, flags);
+ list_del(&state->leak_list);
+ spin_unlock_irqrestore(&leak_lock, flags);
+#endif
+ kmem_cache_free(extent_state_cache, state);
+ }
+}
+
+static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
+ struct rb_node *node)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct tree_entry *entry;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct tree_entry, rb_node);
+
+ if (offset < entry->start)
+ p = &(*p)->rb_left;
+ else if (offset > entry->end)
+ p = &(*p)->rb_right;
+ else
+ return parent;
+ }
+
+ entry = rb_entry(node, struct tree_entry, rb_node);
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
+static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
+ struct rb_node **prev_ret,
+ struct rb_node **next_ret)
+{
+ struct rb_root *root = &tree->state;
+ struct rb_node *n = root->rb_node;
+ struct rb_node *prev = NULL;
+ struct rb_node *orig_prev = NULL;
+ struct tree_entry *entry;
+ struct tree_entry *prev_entry = NULL;
+
+ while (n) {
+ entry = rb_entry(n, struct tree_entry, rb_node);
+ prev = n;
+ prev_entry = entry;
+
+ if (offset < entry->start)
+ n = n->rb_left;
+ else if (offset > entry->end)
+ n = n->rb_right;
+ else
+ return n;
+ }
+
+ if (prev_ret) {
+ orig_prev = prev;
+ while (prev && offset > prev_entry->end) {
+ prev = rb_next(prev);
+ prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ }
+ *prev_ret = prev;
+ prev = orig_prev;
+ }
+
+ if (next_ret) {
+ prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ while (prev && offset < prev_entry->start) {
+ prev = rb_prev(prev);
+ prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ }
+ *next_ret = prev;
+ }
+ return NULL;
+}
+
+static inline struct rb_node *tree_search(struct extent_io_tree *tree,
+ u64 offset)
+{
+ struct rb_node *prev = NULL;
+ struct rb_node *ret;
+
+ ret = __etree_search(tree, offset, &prev, NULL);
+ if (!ret)
+ return prev;
+ return ret;
+}
+
+static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
+ u64 offset, struct rb_node *node)
+{
+ struct rb_root *root = &tree->buffer;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct extent_buffer *eb;
+
+ while (*p) {
+ parent = *p;
+ eb = rb_entry(parent, struct extent_buffer, rb_node);
+
+ if (offset < eb->start)
+ p = &(*p)->rb_left;
+ else if (offset > eb->start)
+ p = &(*p)->rb_right;
+ else
+ return eb;
+ }
+
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
+static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
+ u64 offset)
+{
+ struct rb_root *root = &tree->buffer;
+ struct rb_node *n = root->rb_node;
+ struct extent_buffer *eb;
+
+ while (n) {
+ eb = rb_entry(n, struct extent_buffer, rb_node);
+ if (offset < eb->start)
+ n = n->rb_left;
+ else if (offset > eb->start)
+ n = n->rb_right;
+ else
+ return eb;
+ }
+ return NULL;
+}
+
+/*
+ * utility function to look for merge candidates inside a given range.
+ * Any extents with matching state are merged together into a single
+ * extent in the tree. Extents with EXTENT_IO in their state field
+ * are not merged because the end_io handlers need to be able to do
+ * operations on them without sleeping (or doing allocations/splits).
+ *
+ * This should be called with the tree lock held.
+ */
+static int merge_state(struct extent_io_tree *tree,
+ struct extent_state *state)
+{
+ struct extent_state *other;
+ struct rb_node *other_node;
+
+ if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
+ return 0;
+
+ other_node = rb_prev(&state->rb_node);
+ if (other_node) {
+ other = rb_entry(other_node, struct extent_state, rb_node);
+ if (other->end == state->start - 1 &&
+ other->state == state->state) {
+ state->start = other->start;
+ other->tree = NULL;
+ rb_erase(&other->rb_node, &tree->state);
+ free_extent_state(other);
+ }
+ }
+ other_node = rb_next(&state->rb_node);
+ if (other_node) {
+ other = rb_entry(other_node, struct extent_state, rb_node);
+ if (other->start == state->end + 1 &&
+ other->state == state->state) {
+ other->start = state->start;
+ state->tree = NULL;
+ rb_erase(&state->rb_node, &tree->state);
+ free_extent_state(state);
+ }
+ }
+ return 0;
+}
+
+static void set_state_cb(struct extent_io_tree *tree,
+ struct extent_state *state,
+ unsigned long bits)
+{
+ if (tree->ops && tree->ops->set_bit_hook) {
+ tree->ops->set_bit_hook(tree->mapping->host, state->start,
+ state->end, state->state, bits);
+ }
+}
+
+static void clear_state_cb(struct extent_io_tree *tree,
+ struct extent_state *state,
+ unsigned long bits)
+{
+ if (tree->ops && tree->ops->clear_bit_hook) {
+ tree->ops->clear_bit_hook(tree->mapping->host, state->start,
+ state->end, state->state, bits);
+ }
+}
+
+/*
+ * insert an extent_state struct into the tree. 'bits' are set on the
+ * struct before it is inserted.
+ *
+ * This may return -EEXIST if the extent is already there, in which case the
+ * state struct is freed.
+ *
+ * The tree lock is not taken internally. This is a utility function and
+ * probably isn't what you want to call (see set/clear_extent_bit).
+ */
+static int insert_state(struct extent_io_tree *tree,
+ struct extent_state *state, u64 start, u64 end,
+ int bits)
+{
+ struct rb_node *node;
+
+ if (end < start) {
+ printk(KERN_ERR "btrfs end < start %llu %llu\n",
+ (unsigned long long)end,
+ (unsigned long long)start);
+ WARN_ON(1);
+ }
+ if (bits & EXTENT_DIRTY)
+ tree->dirty_bytes += end - start + 1;
+ set_state_cb(tree, state, bits);
+ state->state |= bits;
+ state->start = start;
+ state->end = end;
+ node = tree_insert(&tree->state, end, &state->rb_node);
+ if (node) {
+ struct extent_state *found;
+ found = rb_entry(node, struct extent_state, rb_node);
+ printk(KERN_ERR "btrfs found node %llu %llu on insert of "
+ "%llu %llu\n", (unsigned long long)found->start,
+ (unsigned long long)found->end,
+ (unsigned long long)start, (unsigned long long)end);
+ free_extent_state(state);
+ return -EEXIST;
+ }
+ state->tree = tree;
+ merge_state(tree, state);
+ return 0;
+}
+
+/*
+ * split a given extent state struct in two, inserting the preallocated
+ * struct 'prealloc' as the newly created second half. 'split' indicates an
+ * offset inside 'orig' where it should be split.
+ *
+ * Before calling,
+ * the tree has 'orig' at [orig->start, orig->end]. After calling, there
+ * are two extent state structs in the tree:
+ * prealloc: [orig->start, split - 1]
+ * orig: [ split, orig->end ]
+ *
+ * The tree locks are not taken by this function. They need to be held
+ * by the caller.
+ */
+static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
+ struct extent_state *prealloc, u64 split)
+{
+ struct rb_node *node;
+ prealloc->start = orig->start;
+ prealloc->end = split - 1;
+ prealloc->state = orig->state;
+ orig->start = split;
+
+ node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
+ if (node) {
+ struct extent_state *found;
+ found = rb_entry(node, struct extent_state, rb_node);
+ free_extent_state(prealloc);
+ return -EEXIST;
+ }
+ prealloc->tree = tree;
+ return 0;
+}
+
+/*
+ * utility function to clear some bits in an extent state struct.
+ * it will optionally wake up any one waiting on this state (wake == 1), or
+ * forcibly remove the state from the tree (delete == 1).
+ *
+ * If no bits are set on the state struct after clearing things, the
+ * struct is freed and removed from the tree
+ */
+static int clear_state_bit(struct extent_io_tree *tree,
+ struct extent_state *state, int bits, int wake,
+ int delete)
+{
+ int ret = state->state & bits;
+
+ if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
+ u64 range = state->end - state->start + 1;
+ WARN_ON(range > tree->dirty_bytes);
+ tree->dirty_bytes -= range;
+ }
+ clear_state_cb(tree, state, bits);
+ state->state &= ~bits;
+ if (wake)
+ wake_up(&state->wq);
+ if (delete || state->state == 0) {
+ if (state->tree) {
+ clear_state_cb(tree, state, state->state);
+ rb_erase(&state->rb_node, &tree->state);
+ state->tree = NULL;
+ free_extent_state(state);
+ } else {
+ WARN_ON(1);
+ }
+ } else {
+ merge_state(tree, state);
+ }
+ return ret;
+}
+
+/*
+ * clear some bits on a range in the tree. This may require splitting
+ * or inserting elements in the tree, so the gfp mask is used to
+ * indicate which allocations or sleeping are allowed.
+ *
+ * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
+ * the given range from the tree regardless of state (ie for truncate).
+ *
+ * the range [start, end] is inclusive.
+ *
+ * This takes the tree lock, and returns < 0 on error, > 0 if any of the
+ * bits were already set, or zero if none of the bits were already set.
+ */
+int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int wake, int delete, gfp_t mask)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct rb_node *node;
+ int err;
+ int set = 0;
+
+again:
+ if (!prealloc && (mask & __GFP_WAIT)) {
+ prealloc = alloc_extent_state(mask);
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ spin_lock(&tree->lock);
+ /*
+ * this search will find the extents that end after
+ * our range starts
+ */
+ node = tree_search(tree, start);
+ if (!node)
+ goto out;
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start > end)
+ goto out;
+ WARN_ON(state->end < start);
+
+ /*
+ * | ---- desired range ---- |
+ * | state | or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip
+ * bits on second half.
+ *
+ * If the extent we found extends past our range, we
+ * just split and search again. It'll get split again
+ * the next time though.
+ *
+ * If the extent we found is inside our range, we clear
+ * the desired bit on it.
+ */
+
+ if (state->start < start) {
+ if (!prealloc)
+ prealloc = alloc_extent_state(GFP_ATOMIC);
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ start = state->end + 1;
+ set |= clear_state_bit(tree, state, bits,
+ wake, delete);
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * We need to split the extent, and clear the bit
+ * on the first half
+ */
+ if (state->start <= end && state->end > end) {
+ if (!prealloc)
+ prealloc = alloc_extent_state(GFP_ATOMIC);
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ if (wake)
+ wake_up(&state->wq);
+ set |= clear_state_bit(tree, prealloc, bits,
+ wake, delete);
+ prealloc = NULL;
+ goto out;
+ }
+
+ start = state->end + 1;
+ set |= clear_state_bit(tree, state, bits, wake, delete);
+ goto search_again;
+
+out:
+ spin_unlock(&tree->lock);
+ if (prealloc)
+ free_extent_state(prealloc);
+
+ return set;
+
+search_again:
+ if (start > end)
+ goto out;
+ spin_unlock(&tree->lock);
+ if (mask & __GFP_WAIT)
+ cond_resched();
+ goto again;
+}
+
+static int wait_on_state(struct extent_io_tree *tree,
+ struct extent_state *state)
+ __releases(tree->lock)
+ __acquires(tree->lock)
+{
+ DEFINE_WAIT(wait);
+ prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&tree->lock);
+ schedule();
+ spin_lock(&tree->lock);
+ finish_wait(&state->wq, &wait);
+ return 0;
+}
+
+/*
+ * waits for one or more bits to clear on a range in the state tree.
+ * The range [start, end] is inclusive.
+ * The tree lock is taken by this function
+ */
+int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+ struct extent_state *state;
+ struct rb_node *node;
+
+ spin_lock(&tree->lock);
+again:
+ while (1) {
+ /*
+ * this search will find all the extents that end after
+ * our range starts
+ */
+ node = tree_search(tree, start);
+ if (!node)
+ break;
+
+ state = rb_entry(node, struct extent_state, rb_node);
+
+ if (state->start > end)
+ goto out;
+
+ if (state->state & bits) {
+ start = state->start;
+ atomic_inc(&state->refs);
+ wait_on_state(tree, state);
+ free_extent_state(state);
+ goto again;
+ }
+ start = state->end + 1;
+
+ if (start > end)
+ break;
+
+ if (need_resched()) {
+ spin_unlock(&tree->lock);
+ cond_resched();
+ spin_lock(&tree->lock);
+ }
+ }
+out:
+ spin_unlock(&tree->lock);
+ return 0;
+}
+
+static void set_state_bits(struct extent_io_tree *tree,
+ struct extent_state *state,
+ int bits)
+{
+ if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
+ u64 range = state->end - state->start + 1;
+ tree->dirty_bytes += range;
+ }
+ set_state_cb(tree, state, bits);
+ state->state |= bits;
+}
+
+/*
+ * set some bits on a range in the tree. This may require allocations
+ * or sleeping, so the gfp mask is used to indicate what is allowed.
+ *
+ * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
+ * range already has the desired bits set. The start of the existing
+ * range is returned in failed_start in this case.
+ *
+ * [start, end] is inclusive
+ * This takes the tree lock.
+ */
+static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int exclusive, u64 *failed_start,
+ gfp_t mask)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct rb_node *node;
+ int err = 0;
+ int set;
+ u64 last_start;
+ u64 last_end;
+again:
+ if (!prealloc && (mask & __GFP_WAIT)) {
+ prealloc = alloc_extent_state(mask);
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ spin_lock(&tree->lock);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(tree, start);
+ if (!node) {
+ err = insert_state(tree, prealloc, start, end, bits);
+ prealloc = NULL;
+ BUG_ON(err == -EEXIST);
+ goto out;
+ }
+
+ state = rb_entry(node, struct extent_state, rb_node);
+ last_start = state->start;
+ last_end = state->end;
+
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ *
+ * Just lock what we found and keep going
+ */
+ if (state->start == start && state->end <= end) {
+ set = state->state & bits;
+ if (set && exclusive) {
+ *failed_start = state->start;
+ err = -EEXIST;
+ goto out;
+ }
+ set_state_bits(tree, state, bits);
+ start = state->end + 1;
+ merge_state(tree, state);
+ goto search_again;
+ }
+
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip bits on
+ * second half.
+ *
+ * If the extent we found extends past our
+ * range, we just split and search again. It'll get split
+ * again the next time though.
+ *
+ * If the extent we found is inside our range, we set the
+ * desired bit on it.
+ */
+ if (state->start < start) {
+ set = state->state & bits;
+ if (exclusive && set) {
+ *failed_start = start;
+ err = -EEXIST;
+ goto out;
+ }
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ set_state_bits(tree, state, bits);
+ start = state->end + 1;
+ merge_state(tree, state);
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state | or | state |
+ *
+ * There's a hole, we need to insert something in it and
+ * ignore the extent we found.
+ */
+ if (state->start > start) {
+ u64 this_end;
+ if (end < last_start)
+ this_end = end;
+ else
+ this_end = last_start - 1;
+ err = insert_state(tree, prealloc, start, this_end,
+ bits);
+ prealloc = NULL;
+ BUG_ON(err == -EEXIST);
+ if (err)
+ goto out;
+ start = this_end + 1;
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * We need to split the extent, and set the bit
+ * on the first half
+ */
+ if (state->start <= end && state->end > end) {
+ set = state->state & bits;
+ if (exclusive && set) {
+ *failed_start = start;
+ err = -EEXIST;
+ goto out;
+ }
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ set_state_bits(tree, prealloc, bits);
+ merge_state(tree, prealloc);
+ prealloc = NULL;
+ goto out;
+ }
+
+ goto search_again;
+
+out:
+ spin_unlock(&tree->lock);
+ if (prealloc)
+ free_extent_state(prealloc);
+
+ return err;
+
+search_again:
+ if (start > end)
+ goto out;
+ spin_unlock(&tree->lock);
+ if (mask & __GFP_WAIT)
+ cond_resched();
+ goto again;
+}
+
+/* wrappers around set/clear extent bit */
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
+ mask);
+}
+
+int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
+}
+
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, bits, 0, NULL,
+ mask);
+}
+
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+}
+
+int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end,
+ EXTENT_DELALLOC | EXTENT_DIRTY,
+ 0, NULL, mask);
+}
+
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end,
+ EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
+}
+
+int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
+}
+
+int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
+ mask);
+}
+
+static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+}
+
+int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
+ mask);
+}
+
+static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
+}
+
+static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
+ 0, NULL, mask);
+}
+
+static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+}
+
+int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
+}
+
+/*
+ * either insert or lock state struct between start and end use mask to tell
+ * us if waiting is desired.
+ */
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+ int err;
+ u64 failed_start;
+ while (1) {
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
+ &failed_start, mask);
+ if (err == -EEXIST && (mask & __GFP_WAIT)) {
+ wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
+ start = failed_start;
+ } else {
+ break;
+ }
+ WARN_ON(start > end);
+ }
+ return err;
+}
+
+int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ int err;
+ u64 failed_start;
+
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
+ &failed_start, mask);
+ if (err == -EEXIST) {
+ if (failed_start > start)
+ clear_extent_bit(tree, start, failed_start - 1,
+ EXTENT_LOCKED, 1, 0, mask);
+ return 0;
+ }
+ return 1;
+}
+
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+}
+
+/*
+ * helper function to set pages and extents in the tree dirty
+ */
+int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(tree->mapping, index);
+ BUG_ON(!page);
+ __set_page_dirty_nobuffers(page);
+ page_cache_release(page);
+ index++;
+ }
+ set_extent_dirty(tree, start, end, GFP_NOFS);
+ return 0;
+}
+
+/*
+ * helper function to set both pages and extents in the tree writeback
+ */
+static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(tree->mapping, index);
+ BUG_ON(!page);
+ set_page_writeback(page);
+ page_cache_release(page);
+ index++;
+ }
+ set_extent_writeback(tree, start, end, GFP_NOFS);
+ return 0;
+}
+
+/*
+ * find the first offset in the io tree with 'bits' set. zero is
+ * returned if we find something, and *start_ret and *end_ret are
+ * set to reflect the state struct that was found.
+ *
+ * If nothing was found, 1 is returned, < 0 on error
+ */
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ int ret = 1;
+
+ spin_lock(&tree->lock);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(tree, start);
+ if (!node)
+ goto out;
+
+ while (1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->end >= start && (state->state & bits)) {
+ *start_ret = state->start;
+ *end_ret = state->end;
+ ret = 0;
+ break;
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ spin_unlock(&tree->lock);
+ return ret;
+}
+
+/* find the first state struct with 'bits' set after 'start', and
+ * return it. tree->lock must be held. NULL will returned if
+ * nothing was found after 'start'
+ */
+struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
+ u64 start, int bits)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(tree, start);
+ if (!node)
+ goto out;
+
+ while (1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->end >= start && (state->state & bits))
+ return state;
+
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ return NULL;
+}
+
+/*
+ * find a contiguous range of bytes in the file marked as delalloc, not
+ * more than 'max_bytes'. start and end are used to return the range,
+ *
+ * 1 is returned if we find something, 0 if nothing was in the tree
+ */
+static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
+ u64 *start, u64 *end, u64 max_bytes)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ u64 cur_start = *start;
+ u64 found = 0;
+ u64 total_bytes = 0;
+
+ spin_lock(&tree->lock);
+
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(tree, cur_start);
+ if (!node) {
+ if (!found)
+ *end = (u64)-1;
+ goto out;
+ }
+
+ while (1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (found && (state->start != cur_start ||
+ (state->state & EXTENT_BOUNDARY))) {
+ goto out;
+ }
+ if (!(state->state & EXTENT_DELALLOC)) {
+ if (!found)
+ *end = state->end;
+ goto out;
+ }
+ if (!found)
+ *start = state->start;
+ found++;
+ *end = state->end;
+ cur_start = state->end + 1;
+ node = rb_next(node);
+ if (!node)
+ break;
+ total_bytes += state->end - state->start + 1;
+ if (total_bytes >= max_bytes)
+ break;
+ }
+out:
+ spin_unlock(&tree->lock);
+ return found;
+}
+
+static noinline int __unlock_for_delalloc(struct inode *inode,
+ struct page *locked_page,
+ u64 start, u64 end)
+{
+ int ret;
+ struct page *pages[16];
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = end_index - index + 1;
+ int i;
+
+ if (index == locked_page->index && end_index == index)
+ return 0;
+
+ while (nr_pages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min_t(unsigned long, nr_pages,
+ ARRAY_SIZE(pages)), pages);
+ for (i = 0; i < ret; i++) {
+ if (pages[i] != locked_page)
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ }
+ nr_pages -= ret;
+ index += ret;
+ cond_resched();
+ }
+ return 0;
+}
+
+static noinline int lock_delalloc_pages(struct inode *inode,
+ struct page *locked_page,
+ u64 delalloc_start,
+ u64 delalloc_end)
+{
+ unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
+ unsigned long start_index = index;
+ unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
+ unsigned long pages_locked = 0;
+ struct page *pages[16];
+ unsigned long nrpages;
+ int ret;
+ int i;
+
+ /* the caller is responsible for locking the start index */
+ if (index == locked_page->index && index == end_index)
+ return 0;
+
+ /* skip the page at the start index */
+ nrpages = end_index - index + 1;
+ while (nrpages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min_t(unsigned long,
+ nrpages, ARRAY_SIZE(pages)), pages);
+ if (ret == 0) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ /* now we have an array of pages, lock them all */
+ for (i = 0; i < ret; i++) {
+ /*
+ * the caller is taking responsibility for
+ * locked_page
+ */
+ if (pages[i] != locked_page) {
+ lock_page(pages[i]);
+ if (!PageDirty(pages[i]) ||
+ pages[i]->mapping != inode->i_mapping) {
+ ret = -EAGAIN;
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ goto done;
+ }
+ }
+ page_cache_release(pages[i]);
+ pages_locked++;
+ }
+ nrpages -= ret;
+ index += ret;
+ cond_resched();
+ }
+ ret = 0;
+done:
+ if (ret && pages_locked) {
+ __unlock_for_delalloc(inode, locked_page,
+ delalloc_start,
+ ((u64)(start_index + pages_locked - 1)) <<
+ PAGE_CACHE_SHIFT);
+ }
+ return ret;
+}
+
+/*
+ * find a contiguous range of bytes in the file marked as delalloc, not
+ * more than 'max_bytes'. start and end are used to return the range,
+ *
+ * 1 is returned if we find something, 0 if nothing was in the tree
+ */
+static noinline u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page,
+ u64 *start, u64 *end,
+ u64 max_bytes)
+{
+ u64 delalloc_start;
+ u64 delalloc_end;
+ u64 found;
+ int ret;
+ int loops = 0;
+
+again:
+ /* step one, find a bunch of delalloc bytes sta