aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-08-27 16:49:44 -0400
committerDavid Woodhouse <dwmw2@hera.kernel.org>2007-08-27 16:49:44 -0400
commita52d9a8033c454cd9b4697cfafb467509fc1693f (patch)
tree83d8bbf2df29598ea9c60498fadf5990262a11c8 /fs
parent83df7c1d8b885e216c68c3f0469bec9b93666b74 (diff)
Btrfs: Extent based page cache code. This uses an rbtree of extents and tests
instead of buffer heads. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Makefile3
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent_map.c1656
-rw-r--r--fs/btrfs/extent_map.h89
-rw-r--r--fs/btrfs/file.c295
-rw-r--r--fs/btrfs/inode.c816
-rw-r--r--fs/btrfs/super.c2
9 files changed, 2203 insertions, 669 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9321438c5ee..ea7a2287035 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -4,7 +4,8 @@ ifneq ($(KERNELRELEASE),)
obj-m := btrfs.o
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \
- transaction.o bit-radix.o inode.o file.o tree-defrag.o
+ transaction.o bit-radix.o inode.o file.o tree-defrag.o \
+ extent_map.o
#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
# root-tree.o dir-item.o hash.o file-item.o inode-item.o \
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f446f161f73..d1d5af471c3 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,11 +19,14 @@
#ifndef __BTRFS_I__
#define __BTRFS_I__
+#include "extent_map.h"
+
/* in memory btrfs inode */
struct btrfs_inode {
struct btrfs_root *root;
struct btrfs_block_group_cache *block_group;
struct btrfs_key location;
+ struct extent_map_tree extent_tree;
struct inode vfs_inode;
/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c7f5161271d..947f061ed11 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1176,7 +1176,14 @@ int btrfs_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to);
int btrfs_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *result, int create);
+struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
+ size_t page_offset, u64 start, u64 end,
+ int create);
+int btrfs_update_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode);
/* file.c */
+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end);
extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 81fd18cbd82..5261733b873 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -77,7 +77,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
u64 num_blocks);
int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh,
u64 logical);
-int btrfs_releasepage(struct page *page, gfp_t flags);
void btrfs_btree_balance_dirty(struct btrfs_root *root);
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct buffer_head *bh);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
new file mode 100644
index 00000000000..d378edf0964
--- /dev/null
+++ b/fs/btrfs/extent_map.c
@@ -0,0 +1,1656 @@
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/bio.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/page-flags.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include "extent_map.h"
+
+static struct kmem_cache *extent_map_cache;
+static struct kmem_cache *extent_state_cache;
+
+struct tree_entry {
+ u64 start;
+ u64 end;
+ int in_tree;
+ struct rb_node rb_node;
+};
+
+/* bits for the extent state */
+#define EXTENT_DIRTY 1
+#define EXTENT_WRITEBACK (1 << 1)
+#define EXTENT_UPTODATE (1 << 2)
+#define EXTENT_LOCKED (1 << 3)
+#define EXTENT_NEW (1 << 4)
+#define EXTENT_DELALLOC (1 << 5)
+
+#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+
+static LIST_HEAD(all_states);
+spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
+
+void __init extent_map_init(void)
+{
+ extent_map_cache = kmem_cache_create("extent_map",
+ sizeof(struct extent_map), 0,
+ SLAB_RECLAIM_ACCOUNT |
+ SLAB_DESTROY_BY_RCU,
+ NULL);
+ extent_state_cache = kmem_cache_create("extent_state",
+ sizeof(struct extent_state), 0,
+ SLAB_RECLAIM_ACCOUNT |
+ SLAB_DESTROY_BY_RCU,
+ NULL);
+}
+
+void __exit extent_map_exit(void)
+{
+ while(!list_empty(&all_states)) {
+ struct extent_state *state;
+ struct list_head *cur = all_states.next;
+ state = list_entry(cur, struct extent_state, list);
+ printk("found leaked state %Lu %Lu state %d in_tree %d\n",
+ state->start, state->end, state->state, state->in_tree);
+ list_del(&state->list);
+ kfree(state);
+ }
+ if (extent_map_cache)
+ kmem_cache_destroy(extent_map_cache);
+ if (extent_state_cache)
+ kmem_cache_destroy(extent_state_cache);
+}
+
+void extent_map_tree_init(struct extent_map_tree *tree,
+ struct address_space *mapping, gfp_t mask)
+{
+ tree->map.rb_node = NULL;
+ tree->state.rb_node = NULL;
+ rwlock_init(&tree->lock);
+ tree->mapping = mapping;
+}
+EXPORT_SYMBOL(extent_map_tree_init);
+
+struct extent_map *alloc_extent_map(gfp_t mask)
+{
+ struct extent_map *em;
+ em = kmem_cache_alloc(extent_map_cache, mask);
+ if (!em || IS_ERR(em))
+ return em;
+ em->in_tree = 0;
+ atomic_set(&em->refs, 1);
+ return em;
+}
+EXPORT_SYMBOL(alloc_extent_map);
+
+void free_extent_map(struct extent_map *em)
+{
+ if (atomic_dec_and_test(&em->refs)) {
+ WARN_ON(em->in_tree);
+ kmem_cache_free(extent_map_cache, em);
+ }
+}
+EXPORT_SYMBOL(free_extent_map);
+
+
+struct extent_state *alloc_extent_state(gfp_t mask)
+{
+ struct extent_state *state;
+ state = kmem_cache_alloc(extent_state_cache, mask);
+ if (!state || IS_ERR(state))
+ return state;
+ state->state = 0;
+ state->in_tree = 0;
+ atomic_set(&state->refs, 1);
+ init_waitqueue_head(&state->wq);
+ spin_lock_irq(&state_lock);
+ list_add(&state->list, &all_states);
+ spin_unlock_irq(&state_lock);
+ return state;
+}
+EXPORT_SYMBOL(alloc_extent_state);
+
+void free_extent_state(struct extent_state *state)
+{
+ if (atomic_dec_and_test(&state->refs)) {
+ WARN_ON(state->in_tree);
+ spin_lock_irq(&state_lock);
+ list_del_init(&state->list);
+ spin_unlock_irq(&state_lock);
+ kmem_cache_free(extent_state_cache, state);
+ }
+}
+EXPORT_SYMBOL(free_extent_state);
+
+static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
+ struct rb_node *node)
+{
+ struct rb_node ** p = &root->rb_node;
+ struct rb_node * parent = NULL;
+ struct tree_entry *entry;
+
+ while(*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct tree_entry, rb_node);
+
+ if (offset < entry->start)
+ p = &(*p)->rb_left;
+ else if (offset > entry->end)
+ p = &(*p)->rb_right;
+ else
+ return parent;
+ }
+
+ entry = rb_entry(node, struct tree_entry, rb_node);
+ entry->in_tree = 1;
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
+static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
+ struct rb_node **prev_ret)
+{
+ struct rb_node * n = root->rb_node;
+ struct rb_node *prev = NULL;
+ struct tree_entry *entry;
+ struct tree_entry *prev_entry = NULL;
+
+ while(n) {
+ entry = rb_entry(n, struct tree_entry, rb_node);
+ prev = n;
+ prev_entry = entry;
+
+ if (offset < entry->start)
+ n = n->rb_left;
+ else if (offset > entry->end)
+ n = n->rb_right;
+ else
+ return n;
+ }
+ if (!prev_ret)
+ return NULL;
+ while(prev && offset > prev_entry->end) {
+ prev = rb_next(prev);
+ prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ }
+ *prev_ret = prev;
+ return NULL;
+}
+
+static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
+{
+ struct rb_node *prev;
+ struct rb_node *ret;
+ ret = __tree_search(root, offset, &prev);
+ if (!ret)
+ return prev;
+ return ret;
+}
+
+static int tree_delete(struct rb_root *root, u64 offset)
+{
+ struct rb_node *node;
+ struct tree_entry *entry;
+
+ node = __tree_search(root, offset, NULL);
+ if (!node)
+ return -ENOENT;
+ entry = rb_entry(node, struct tree_entry, rb_node);
+ entry->in_tree = 0;
+ rb_erase(node, root);
+ return 0;
+}
+
+/*
+ * add_extent_mapping tries a simple backward merge with existing
+ * mappings. The extent_map struct passed in will be inserted into
+ * the tree directly (no copies made, just a reference taken).
+ */
+int add_extent_mapping(struct extent_map_tree *tree,
+ struct extent_map *em)
+{
+ int ret = 0;
+ struct extent_map *prev = NULL;
+ struct rb_node *rb;
+
+ write_lock_irq(&tree->lock);
+ rb = tree_insert(&tree->map, em->end, &em->rb_node);
+ if (rb) {
+ prev = rb_entry(rb, struct extent_map, rb_node);
+ printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end);
+ ret = -EEXIST;
+ goto out;
+ }
+ atomic_inc(&em->refs);
+ if (em->start != 0) {
+ rb = rb_prev(&em->rb_node);
+ if (rb)
+ prev = rb_entry(rb, struct extent_map, rb_node);
+ if (prev && prev->end + 1 == em->start &&
+ ((em->block_start == 0 && prev->block_start == 0) ||
+ (em->block_start == prev->block_end + 1))) {
+ em->start = prev->start;
+ em->block_start = prev->block_start;
+ rb_erase(&prev->rb_node, &tree->map);
+ prev->in_tree = 0;
+ free_extent_map(prev);
+ }
+ }
+out:
+ write_unlock_irq(&tree->lock);
+ return ret;
+}
+EXPORT_SYMBOL(add_extent_mapping);
+
+/*
+ * lookup_extent_mapping returns the first extent_map struct in the
+ * tree that intersects the [start, end] (inclusive) range. There may
+ * be additional objects in the tree that intersect, so check the object
+ * returned carefully to make sure you don't need additional lookups.
+ */
+struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 end)
+{
+ struct extent_map *em;
+ struct rb_node *rb_node;
+
+ read_lock_irq(&tree->lock);
+ rb_node = tree_search(&tree->map, start);
+ if (!rb_node) {
+ em = NULL;
+ goto out;
+ }
+ if (IS_ERR(rb_node)) {
+ em = ERR_PTR(PTR_ERR(rb_node));
+ goto out;
+ }
+ em = rb_entry(rb_node, struct extent_map, rb_node);
+ if (em->end < start || em->start > end) {
+ em = NULL;
+ goto out;
+ }
+ atomic_inc(&em->refs);
+out:
+ read_unlock_irq(&tree->lock);
+ return em;
+}
+EXPORT_SYMBOL(lookup_extent_mapping);
+
+/*
+ * removes an extent_map struct from the tree. No reference counts are
+ * dropped, and no checks are done to see if the range is in use
+ */
+int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
+{
+ int ret;
+
+ write_lock_irq(&tree->lock);
+ ret = tree_delete(&tree->map, em->end);
+ write_unlock_irq(&tree->lock);
+ return ret;
+}
+EXPORT_SYMBOL(remove_extent_mapping);
+
+/*
+ * utility function to look for merge candidates inside a given range.
+ * Any extents with matching state are merged together into a single
+ * extent in the tree. Extents with EXTENT_IO in their state field
+ * are not merged because the end_io handlers need to be able to do
+ * operations on them without sleeping (or doing allocations/splits).
+ *
+ * This should be called with the tree lock held.
+ */
+static int merge_state(struct extent_map_tree *tree,
+ struct extent_state *state)
+{
+ struct extent_state *other;
+ struct rb_node *other_node;
+
+ if (state->state & EXTENT_IOBITS)
+ return 0;
+
+ other_node = rb_prev(&state->rb_node);
+ if (other_node) {
+ other = rb_entry(other_node, struct extent_state, rb_node);
+ if (other->end == state->start - 1 &&
+ other->state == state->state) {
+ state->start = other->start;
+ other->in_tree = 0;
+ rb_erase(&other->rb_node, &tree->state);
+ free_extent_state(other);
+ }
+ }
+ other_node = rb_next(&state->rb_node);
+ if (other_node) {
+ other = rb_entry(other_node, struct extent_state, rb_node);
+ if (other->start == state->end + 1 &&
+ other->state == state->state) {
+ other->start = state->start;
+ state->in_tree = 0;
+ rb_erase(&state->rb_node, &tree->state);
+ free_extent_state(state);
+ }
+ }
+ return 0;
+}
+
+/*
+ * insert an extent_state struct into the tree. 'bits' are set on the
+ * struct before it is inserted.
+ *
+ * This may return -EEXIST if the extent is already there, in which case the
+ * state struct is freed.
+ *
+ * The tree lock is not taken internally. This is a utility function and
+ * probably isn't what you want to call (see set/clear_extent_bit).
+ */
+static int insert_state(struct extent_map_tree *tree,
+ struct extent_state *state, u64 start, u64 end,
+ int bits)
+{
+ struct rb_node *node;
+
+ if (end < start) {
+ printk("end < start %Lu %Lu\n", end, start);
+ WARN_ON(1);
+ }
+ state->state |= bits;
+ state->start = start;
+ state->end = end;
+ if ((end & 4095) == 0) {
+ printk("insert state %Lu %Lu strange end\n", start, end);
+ WARN_ON(1);
+ }
+ node = tree_insert(&tree->state, end, &state->rb_node);
+ if (node) {
+ struct extent_state *found;
+ found = rb_entry(node, struct extent_state, rb_node);
+printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
+ free_extent_state(state);
+ return -EEXIST;
+ }
+ merge_state(tree, state);
+ return 0;
+}
+
+/*
+ * split a given extent state struct in two, inserting the preallocated
+ * struct 'prealloc' as the newly created second half. 'split' indicates an
+ * offset inside 'orig' where it should be split.
+ *
+ * Before calling,
+ * the tree has 'orig' at [orig->start, orig->end]. After calling, there
+ * are two extent state structs in the tree:
+ * prealloc: [orig->start, split - 1]
+ * orig: [ split, orig->end ]
+ *
+ * The tree locks are not taken by this function. They need to be held
+ * by the caller.
+ */
+static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
+ struct extent_state *prealloc, u64 split)
+{
+ struct rb_node *node;
+ prealloc->start = orig->start;
+ prealloc->end = split - 1;
+ prealloc->state = orig->state;
+ orig->start = split;
+ if ((prealloc->end & 4095) == 0) {
+ printk("insert state %Lu %Lu strange end\n", prealloc->start,
+ prealloc->end);
+ WARN_ON(1);
+ }
+ node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
+ if (node) {
+ struct extent_state *found;
+ found = rb_entry(node, struct extent_state, rb_node);
+printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
+ free_extent_state(prealloc);
+ return -EEXIST;
+ }
+ return 0;
+}
+
+/*
+ * utility function to clear some bits in an extent state struct.
+ * it will optionally wake up any one waiting on this state (wake == 1), or
+ * forcibly remove the state from the tree (delete == 1).
+ *
+ * If no bits are set on the state struct after clearing things, the
+ * struct is freed and removed from the tree
+ */
+static int clear_state_bit(struct extent_map_tree *tree,
+ struct extent_state *state, int bits, int wake,
+ int delete)
+{
+ int ret = state->state & bits;
+ state->state &= ~bits;
+ if (wake)
+ wake_up(&state->wq);
+ if (delete || state->state == 0) {
+ if (state->in_tree) {
+ rb_erase(&state->rb_node, &tree->state);
+ state->in_tree = 0;
+ free_extent_state(state);
+ } else {
+ WARN_ON(1);
+ }
+ } else {
+ merge_state(tree, state);
+ }
+ return ret;
+}
+
+/*
+ * clear some bits on a range in the tree. This may require splitting
+ * or inserting elements in the tree, so the gfp mask is used to
+ * indicate which allocations or sleeping are allowed.
+ *
+ * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
+ * the given range from the tree regardless of state (ie for truncate).
+ *
+ * the range [start, end] is inclusive.
+ *
+ * This takes the tree lock, and returns < 0 on error, > 0 if any of the
+ * bits were already set, or zero if none of the bits were already set.
+ */
+int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
+ int bits, int wake, int delete, gfp_t mask)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct rb_node *node;
+ int err;
+ int set = 0;
+
+again:
+ if (!prealloc && (mask & __GFP_WAIT)) {
+ prealloc = alloc_extent_state(mask);
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ write_lock_irq(&tree->lock);
+ /*
+ * this search will find the extents that end after
+ * our range starts
+ */
+ node = tree_search(&tree->state, start);
+ if (!node)
+ goto out;
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start > end)
+ goto out;
+ WARN_ON(state->end < start);
+
+ /*
+ * | ---- desired range ---- |
+ * | state | or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip
+ * bits on second half.
+ *
+ * If the extent we found extends past our range, we
+ * just split and search again. It'll get split again
+ * the next time though.
+ *
+ * If the extent we found is inside our range, we clear
+ * the desired bit on it.
+ */
+
+ if (state->start < start) {
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ start = state->end + 1;
+ set |= clear_state_bit(tree, state, bits,
+ wake, delete);
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * We need to split the extent, and clear the bit
+ * on the first half
+ */
+ if (state->start <= end && state->end > end) {
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ if (wake)
+ wake_up(&state->wq);
+ set |= clear_state_bit(tree, prealloc, bits,
+ wake, delete);
+ prealloc = NULL;
+ goto out;
+ }
+
+ start = state->end + 1;
+ set |= clear_state_bit(tree, state, bits, wake, delete);
+ goto search_again;
+
+out:
+ write_unlock_irq(&tree->lock);
+ if (prealloc)
+ free_extent_state(prealloc);
+
+ return set;
+
+search_again:
+ if (start >= end)
+ goto out;
+ write_unlock_irq(&tree->lock);
+ if (mask & __GFP_WAIT)
+ cond_resched();
+ goto again;
+}
+EXPORT_SYMBOL(clear_extent_bit);
+
+static int wait_on_state(struct extent_map_tree *tree,
+ struct extent_state *state)
+{
+ DEFINE_WAIT(wait);
+ prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
+ read_unlock_irq(&tree->lock);
+ schedule();
+ read_lock_irq(&tree->lock);
+ finish_wait(&state->wq, &wait);
+ return 0;
+}
+
+/*
+ * waits for one or more bits to clear on a range in the state tree.
+ * The range [start, end] is inclusive.
+ * The tree lock is taken by this function
+ */
+int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
+{
+ struct extent_state *state;
+ struct rb_node *node;
+
+ read_lock_irq(&tree->lock);
+again:
+ while (1) {
+ /*
+ * this search will find all the extents that end after
+ * our range starts
+ */
+ node = tree_search(&tree->state, start);
+ if (!node)
+ break;
+
+ state = rb_entry(node, struct extent_state, rb_node);
+
+ if (state->start > end)
+ goto out;
+
+ if (state->state & bits) {
+ start = state->start;
+ atomic_inc(&state->refs);
+ wait_on_state(tree, state);
+ free_extent_state(state);
+ goto again;
+ }
+ start = state->end + 1;
+
+ if (start > end)
+ break;
+
+ if (need_resched()) {
+ read_unlock_irq(&tree->lock);
+ cond_resched();
+ read_lock_irq(&tree->lock);
+ }
+ }
+out:
+ read_unlock_irq(&tree->lock);
+ return 0;
+}
+EXPORT_SYMBOL(wait_extent_bit);
+
+/*
+ * set some bits on a range in the tree. This may require allocations
+ * or sleeping, so the gfp mask is used to indicate what is allowed.
+ *
+ * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
+ * range already has the desired bits set. The start of the existing
+ * range is returned in failed_start in this case.
+ *
+ * [start, end] is inclusive
+ * This takes the tree lock.
+ */
+int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
+ int exclusive, u64 *failed_start, gfp_t mask)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct rb_node *node;
+ int err = 0;
+ int set;
+ u64 last_start;
+ u64 last_end;
+again:
+ if (!prealloc && (mask & __GFP_WAIT)) {
+ prealloc = alloc_extent_state(mask);
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ write_lock_irq(&tree->lock);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(&tree->state, start);
+ if (!node) {
+ err = insert_state(tree, prealloc, start, end, bits);
+ prealloc = NULL;
+ BUG_ON(err == -EEXIST);
+ goto out;
+ }
+
+ state = rb_entry(node, struct extent_state, rb_node);
+ last_start = state->start;
+ last_end = state->end;
+
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ *
+ * Just lock what we found and keep going
+ */
+ if (state->start == start && state->end <= end) {
+ set = state->state & bits;
+ if (set && exclusive) {
+ *failed_start = state->start;
+ err = -EEXIST;
+ goto out;
+ }
+ state->state |= bits;
+ start = state->end + 1;
+ merge_state(tree, state);
+ goto search_again;
+ }
+
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip bits on
+ * second half.
+ *
+ * If the extent we found extends past our
+ * range, we just split and search again. It'll get split
+ * again the next time though.
+ *
+ * If the extent we found is inside our range, we set the
+ * desired bit on it.
+ */
+ if (state->start < start) {
+ set = state->state & bits;
+ if (exclusive && set) {
+ *failed_start = start;
+ err = -EEXIST;
+ goto out;
+ }
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ state->state |= bits;
+ start = state->end + 1;
+ merge_state(tree, state);
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * We need to split the extent, and set the bit
+ * on the first half
+ */
+ if (state->start <= end && state->end > end) {
+ set = state->state & bits;
+ if (exclusive && set) {
+ *failed_start = start;
+ err = -EEXIST;
+ goto out;
+ }
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ prealloc->state |= bits;
+ merge_state(tree, prealloc);
+ prealloc = NULL;
+ goto out;
+ }
+
+ /*
+ * | ---- desired range ---- |
+ * | state | or | state |
+ *
+ * There's a hole, we need to insert something in it and
+ * ignore the extent we found.
+ */
+ if (state->start > start) {
+ u64 this_end;
+ if (end < last_start)
+ this_end = end;
+ else
+ this_end = last_start -1;
+ err = insert_state(tree, prealloc, start, this_end,
+ bits);
+ prealloc = NULL;
+ BUG_ON(err == -EEXIST);
+ if (err)
+ goto out;
+ start = this_end + 1;
+ goto search_again;
+ }
+ goto search_again;
+
+out:
+ write_unlock_irq(&tree->lock);
+ if (prealloc)
+ free_extent_state(prealloc);
+
+ return err;
+
+search_again:
+ if (start > end)
+ goto out;
+ write_unlock_irq(&tree->lock);
+ if (mask & __GFP_WAIT)
+ cond_resched();
+ goto again;
+}
+EXPORT_SYMBOL(set_extent_bit);
+
+/* wrappers around set/clear extent bit */
+int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
+ mask);
+}
+EXPORT_SYMBOL(set_extent_dirty);
+
+int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_dirty);
+
+int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
+ mask);
+}
+EXPORT_SYMBOL(set_extent_new);
+
+int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_new);
+
+int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
+ mask);
+}
+EXPORT_SYMBOL(set_extent_uptodate);
+
+int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_uptodate);
+
+int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
+ 0, NULL, mask);
+}
+EXPORT_SYMBOL(set_extent_writeback);
+
+int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_writeback);
+
+int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
+{
+ return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
+}
+EXPORT_SYMBOL(wait_on_extent_writeback);
+
+/*
+ * locks a range in ascending order, waiting for any locked regions
+ * it hits on the way. [start,end] are inclusive, and this will sleep.
+ */
+int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+ int err;
+ u64 failed_start;
+ while (1) {
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
+ &failed_start, mask);
+ if (err == -EEXIST && (mask & __GFP_WAIT)) {
+ wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
+ start = failed_start;
+ } else {
+ break;
+ }
+ WARN_ON(start > end);
+ }
+ return err;
+}
+EXPORT_SYMBOL(lock_extent);
+
+int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+}
+EXPORT_SYMBOL(unlock_extent);
+
+/*
+ * helper function to set pages and extents in the tree dirty
+ */
+int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(tree->mapping, index);
+ BUG_ON(!page);
+ __set_page_dirty_nobuffers(page);
+ page_cache_release(page);
+ index++;
+ }
+ set_extent_dirty(tree, start, end, GFP_NOFS);
+ return 0;
+}
+EXPORT_SYMBOL(set_range_dirty);
+
+/*
+ * helper function to set both pages and extents in the tree writeback
+ */
+int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(tree->mapping, index);
+ BUG_ON(!page);
+ set_page_writeback(page);
+ page_cache_release(page);
+ index++;
+ }
+ set_extent_writeback(tree, start, end, GFP_NOFS);
+ return 0;
+}
+EXPORT_SYMBOL(set_range_writeback);
+
+/*
+ * helper function to lock both pages and extents in the tree.
+ * pages must be locked first.
+ */
+int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+ int err;
+
+ while (index <= end_index) {
+ page = grab_cache_page(tree->mapping, index);
+ if (!page) {
+ err = -ENOMEM;
+ goto failed;
+ }
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto failed;
+ }
+ index++;
+ }
+ lock_extent(tree, start, end, GFP_NOFS);
+ return 0;
+
+failed:
+ /*
+ * we failed above in getting the page at 'index', so we undo here
+ * up to but not including the page at 'index'
+ */
+ end_index = index;
+ index = start >> PAGE_CACHE_SHIFT;
+ while (index < end_index) {
+ page = find_get_page(tree->mapping, index);
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ }
+ return err;
+}
+EXPORT_SYMBOL(lock_range);
+
+/*
+ * helper function to unlock both pages and extents in the tree.
+ */
+int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(tree->mapping, index);
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ }
+ unlock_extent(tree, start, end, GFP_NOFS);
+ return 0;
+}
+EXPORT_SYMBOL(unlock_range);
+
+/*
+ * searches a range in the state tree for a given mask.
+ * If 'filled' == 1, this returns 1 only if ever extent in the tree
+ * has the bits set. Otherwise, 1 is returned if any bit in the
+ * range is found set.
+ */
+static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
+ int bits, int filled)
+{
+ struct extent_state *state = NULL;
+ struct rb_node *node;
+ int bitset = 0;
+
+ read_lock_irq(&tree->lock);
+ node = tree_search(&tree->state, start);
+ while (node && start <= end) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start > end)
+ break;
+
+ if (filled && state->start > start) {
+ bitset = 0;
+ break;
+ }
+ if (state->state & bits) {
+ bitset = 1;
+ if (!filled)
+ break;
+ } else if (filled) {
+ bitset = 0;
+ break;
+ }
+ start = state->end + 1;
+ if (start > end)
+ break;
+ node = rb_next(node);
+ }
+ read_unlock_irq(&tree->lock);
+ return bitset;
+}
+
+/*
+ * helper function to set a given page up to date if all the
+ * extents in the tree for that page are up to date
+ */
+static int check_page_uptodate(struct extent_map_tree *tree,
+ struct page *page)
+{
+ u64 start = page->index << PAGE_CACHE_SHIFT;
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+