aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c3
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/delayed-ref.c585
-rw-r--r--fs/btrfs/delayed-ref.h182
-rw-r--r--fs/btrfs/disk-io.c29
-rw-r--r--fs/btrfs/extent-tree.c1496
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/btrfs/transaction.c54
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-defrag.c2
11 files changed, 1234 insertions, 1140 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index d2cf5a54a4b..9adf5e4f7e9 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
- compression.o
+ compression.o delayed-ref.o
else
# Normal Makefile
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 87c90387283..bebc9fd1666 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -922,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
spin_unlock(&root->node_lock);
ret = btrfs_update_extent_ref(trans, root, child->start,
+ child->len,
mid->start, child->start,
root->root_key.objectid,
trans->transid, level - 1);
@@ -2075,7 +2076,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
spin_unlock(&root->node_lock);
ret = btrfs_update_extent_ref(trans, root, lower->start,
- lower->start, c->start,
+ lower->len, lower->start, c->start,
root->root_key.objectid,
trans->transid, level - 1);
BUG_ON(ret);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3a37ba7a8d6..ced5fd85dc3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -688,8 +688,6 @@ struct btrfs_fs_info {
struct rb_root block_group_cache_tree;
struct extent_io_tree pinned_extents;
- struct extent_io_tree pending_del;
- struct extent_io_tree extent_ins;
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
@@ -717,7 +715,6 @@ struct btrfs_fs_info {
struct mutex tree_log_mutex;
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
- struct mutex extent_ins_mutex;
struct mutex pinned_mutex;
struct mutex chunk_mutex;
struct mutex drop_mutex;
@@ -1704,18 +1701,15 @@ static inline struct dentry *fdentry(struct file *file)
}
/* extent-tree.c */
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u32 *refs);
int btrfs_update_pinned_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int pin);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid, u64 bytenr);
-int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
@@ -1777,7 +1771,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 ref_generation,
u64 owner_objectid);
int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
+ struct btrfs_root *root, u64 bytenr, u64 num_bytes,
u64 orig_parent, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner_objectid);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
new file mode 100644
index 00000000000..874565a1f63
--- /dev/null
+++ b/fs/btrfs/delayed-ref.c
@@ -0,0 +1,585 @@
+/*
+ * Copyright (C) 2009 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/sched.h>
+#include <linux/sort.h>
+#include <linux/ftrace.h>
+#include "ctree.h"
+#include "delayed-ref.h"
+#include "transaction.h"
+
+/*
+ * delayed back reference update tracking. For subvolume trees
+ * we queue up extent allocations and backref maintenance for
+ * delayed processing. This avoids deep call chains where we
+ * add extents in the middle of btrfs_search_slot, and it allows
+ * us to buffer up frequently modified backrefs in an rb tree instead
+ * of hammering updates on the extent allocation tree.
+ *
+ * Right now this code is only used for reference counted trees, but
+ * the long term goal is to get rid of the similar code for delayed
+ * extent tree modifications.
+ */
+
+/*
+ * entries in the rb tree are ordered by the byte number of the extent
+ * and by the byte number of the parent block.
+ */
+static int comp_entry(struct btrfs_delayed_ref_node *ref,
+ u64 bytenr, u64 parent)
+{
+ if (bytenr < ref->bytenr)
+ return -1;
+ if (bytenr > ref->bytenr)
+ return 1;
+ if (parent < ref->parent)
+ return -1;
+ if (parent > ref->parent)
+ return 1;
+ return 0;
+}
+
+/*
+ * insert a new ref into the rbtree. This returns any existing refs
+ * for the same (bytenr,parent) tuple, or NULL if the new node was properly
+ * inserted.
+ */
+static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
+ u64 bytenr, u64 parent,
+ struct rb_node *node)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent_node = NULL;
+ struct btrfs_delayed_ref_node *entry;
+ int cmp;
+
+ while (*p) {
+ parent_node = *p;
+ entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
+ rb_node);
+
+ cmp = comp_entry(entry, bytenr, parent);
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else if (cmp > 0)
+ p = &(*p)->rb_right;
+ else
+ return entry;
+ }
+
+ entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ rb_link_node(node, parent_node, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
+/*
+ * find an entry based on (bytenr,parent). This returns the delayed
+ * ref if it was able to find one, or NULL if nothing was in that spot
+ */
+static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
+ u64 bytenr, u64 parent)
+{
+ struct rb_node *n = root->rb_node;
+ struct btrfs_delayed_ref_node *entry;
+ int cmp;
+
+ while (n) {
+ entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
+ WARN_ON(!entry->in_tree);
+
+ cmp = comp_entry(entry, bytenr, parent);
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return entry;
+ }
+ return NULL;
+}
+
+/*
+ * Locking on delayed refs is done by taking a lock on the head node,
+ * which has the (impossible) parent id of (u64)-1. Once a lock is held
+ * on the head node, you're allowed (and required) to process all the
+ * delayed refs for a given byte number in the tree.
+ *
+ * This will walk forward in the rbtree until it finds a head node it
+ * is able to lock. It might not lock the delayed ref you asked for,
+ * and so it will return the one it did lock in next_ret and return 0.
+ *
+ * If no locks are taken, next_ret is set to null and 1 is returned. This
+ * means there are no more unlocked head nodes in the rbtree.
+ */
+int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_node *ref,
+ struct btrfs_delayed_ref_head **next_ret)
+{
+ struct rb_node *node;
+ struct btrfs_delayed_ref_head *head;
+ int ret = 0;
+
+ while (1) {
+ if (btrfs_delayed_ref_is_head(ref)) {
+ head = btrfs_delayed_node_to_head(ref);
+ if (mutex_trylock(&head->mutex)) {
+ *next_ret = head;
+ ret = 0;
+ break;
+ }
+ }
+ node = rb_next(&ref->rb_node);
+ if (!node) {
+ ret = 1;
+ *next_ret = NULL;
+ break;
+ }
+ ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ }
+ return ret;
+}
+
+/*
+ * This checks to see if there are any delayed refs in the
+ * btree for a given bytenr. It returns one if it finds any
+ * and zero otherwise.
+ *
+ * If it only finds a head node, it returns 0.
+ *
+ * The idea is to use this when deciding if you can safely delete an
+ * extent from the extent allocation tree. There may be a pending
+ * ref in the rbtree that adds or removes references, so as long as this
+ * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
+ * allocation tree.
+ */
+int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
+{
+ struct btrfs_delayed_ref_node *ref;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct rb_node *prev_node;
+ int ret = 0;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+
+ ref = tree_search(&delayed_refs->root, bytenr, (u64)-1);
+ if (ref) {
+ prev_node = rb_prev(&ref->rb_node);
+ if (!prev_node)
+ goto out;
+ ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
+ rb_node);
+ if (ref->bytenr == bytenr)
+ ret = 1;
+ }
+out:
+ spin_unlock(&delayed_refs->lock);
+ return ret;
+}
+
+/*
+ * helper function to lookup reference count
+ *
+ * the head node for delayed ref is used to store the sum of all the
+ * reference count modifications queued up in the rbtree. This way you
+ * can check to see what the reference count would be if all of the
+ * delayed refs are processed.
+ */
+int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u32 *refs)
+{
+ struct btrfs_delayed_ref_node *ref;
+ struct btrfs_delayed_ref_head *head;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_item *ei;
+ struct btrfs_key key;
+ u32 num_refs;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+ delayed_refs = &trans->transaction->delayed_refs;
+again:
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+ &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ if (ret == 0) {
+ leaf = path->nodes[0];
+ ei = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item);
+ num_refs = btrfs_extent_refs(leaf, ei);
+ } else {
+ num_refs = 0;
+ ret = 0;
+ }
+
+ spin_lock(&delayed_refs->lock);
+ ref = tree_search(&delayed_refs->root, bytenr, (u64)-1);
+ if (ref) {
+ head = btrfs_delayed_node_to_head(ref);
+ if (mutex_trylock(&head->mutex)) {
+ num_refs += ref->ref_mod;
+ mutex_unlock(&head->mutex);
+ *refs = num_refs;
+ goto out;
+ }
+
+ atomic_inc(&ref->refs);
+ spin_unlock(&delayed_refs->lock);
+
+ btrfs_release_path(root->fs_info->extent_root, path);
+
+ mutex_lock(&head->mutex);
+ mutex_unlock(&head->mutex);
+ btrfs_put_delayed_ref(ref);
+ goto again;
+ } else {
+ *refs = num_refs;
+ }
+out:
+ spin_unlock(&delayed_refs->lock);
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * helper function to update an extent delayed ref in the
+ * rbtree. existing and update must both have the same
+ * bytenr and parent
+ *
+ * This may free existing if the update cancels out whatever
+ * operation it was doing.
+ */
+static noinline void
+update_existing_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_node *existing,
+ struct btrfs_delayed_ref_node *update)
+{
+ struct btrfs_delayed_ref *existing_ref;
+ struct btrfs_delayed_ref *ref;
+
+ existing_ref = btrfs_delayed_node_to_ref(existing);
+ ref = btrfs_delayed_node_to_ref(update);
+
+ if (ref->pin)
+ existing_ref->pin = 1;
+
+ if (ref->action != existing_ref->action) {
+ /*
+ * this is effectively undoing either an add or a
+ * drop. We decrement the ref_mod, and if it goes
+ * down to zero we just delete the entry without
+ * every changing the extent allocation tree.
+ */
+ existing->ref_mod--;
+ if (existing->ref_mod == 0) {
+ rb_erase(&existing->rb_node,
+ &delayed_refs->root);
+ existing->in_tree = 0;
+ btrfs_put_delayed_ref(existing);
+ delayed_refs->num_entries--;
+ if (trans->delayed_ref_updates)
+ trans->delayed_ref_updates--;
+ }
+ } else {
+ if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
+ /* if we're adding refs, make sure all the
+ * details match up. The extent could
+ * have been totally freed and reallocated
+ * by a different owner before the delayed
+ * ref entries were removed.
+ */
+ existing_ref->owner_objectid = ref->owner_objectid;
+ existing_ref->generation = ref->generation;
+ existing_ref->root = ref->root;
+ existing->num_bytes = update->num_bytes;
+ }
+ /*
+ * the action on the existing ref matches
+ * the action on the ref we're trying to add.
+ * Bump the ref_mod by one so the backref that
+ * is eventually added/removed has the correct
+ * reference count
+ */
+ existing->ref_mod += update->ref_mod;
+ }
+}
+
+/*
+ * helper function to update the accounting in the head ref
+ * existing and update must have the same bytenr
+ */
+static noinline void
+update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
+ struct btrfs_delayed_ref_node *update)
+{
+ struct btrfs_delayed_ref_head *existing_ref;
+ struct btrfs_delayed_ref_head *ref;
+
+ existing_ref = btrfs_delayed_node_to_head(existing);
+ ref = btrfs_delayed_node_to_head(update);
+
+ if (ref->must_insert_reserved) {
+ /* if the extent was freed and then
+ * reallocated before the delayed ref
+ * entries were processed, we can end up
+ * with an existing head ref without
+ * the must_insert_reserved flag set.
+ * Set it again here
+ */
+ existing_ref->must_insert_reserved = ref->must_insert_reserved;
+
+ /*
+ * update the num_bytes so we make sure the accounting
+ * is done correctly
+ */
+ existing->num_bytes = update->num_bytes;
+
+ }
+
+ /*
+ * update the reference mod on the head to reflect this new operation
+ */
+ existing->ref_mod += update->ref_mod;
+}
+
+/*
+ * helper function to actually insert a delayed ref into the rbtree.
+ * this does all the dirty work in terms of maintaining the correct
+ * overall modification count in the head node and properly dealing
+ * with updating existing nodes as new modifications are queued.
+ */
+static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_node *ref,
+ u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
+ u64 ref_generation, u64 owner_objectid, int action,
+ int pin)
+{
+ struct btrfs_delayed_ref_node *existing;
+ struct btrfs_delayed_ref *full_ref;
+ struct btrfs_delayed_ref_head *head_ref;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ int count_mod = 1;
+ int must_insert_reserved = 0;
+
+ /*
+ * the head node stores the sum of all the mods, so dropping a ref
+ * should drop the sum in the head node by one.
+ */
+ if (parent == (u64)-1 && action == BTRFS_DROP_DELAYED_REF)
+ count_mod = -1;
+
+ /*
+ * BTRFS_ADD_DELAYED_EXTENT means that we need to update
+ * the reserved accounting when the extent is finally added, or
+ * if a later modification deletes the delayed ref without ever
+ * inserting the extent into the extent allocation tree.
+ * ref->must_insert_reserved is the flag used to record
+ * that accounting mods are required.
+ *
+ * Once we record must_insert_reserved, switch the action to
+ * BTRFS_ADD_DELAYED_REF because other special casing is not required.
+ */
+ if (action == BTRFS_ADD_DELAYED_EXTENT) {
+ must_insert_reserved = 1;
+ action = BTRFS_ADD_DELAYED_REF;
+ } else {
+ must_insert_reserved = 0;
+ }
+
+
+ delayed_refs = &trans->transaction->delayed_refs;
+
+ /* first set the basic ref node struct up */
+ atomic_set(&ref->refs, 1);
+ ref->bytenr = bytenr;
+ ref->parent = parent;
+ ref->ref_mod = count_mod;
+ ref->in_tree = 1;
+ ref->num_bytes = num_bytes;
+
+ if (btrfs_delayed_ref_is_head(ref)) {
+ head_ref = btrfs_delayed_node_to_head(ref);
+ head_ref->must_insert_reserved = must_insert_reserved;
+ mutex_init(&head_ref->mutex);
+ } else {
+ full_ref = btrfs_delayed_node_to_ref(ref);
+ full_ref->root = ref_root;
+ full_ref->generation = ref_generation;
+ full_ref->owner_objectid = owner_objectid;
+ full_ref->pin = pin;
+ full_ref->action = action;
+ }
+
+ existing = tree_insert(&delayed_refs->root, bytenr,
+ parent, &ref->rb_node);
+
+ if (existing) {
+ if (btrfs_delayed_ref_is_head(ref))
+ update_existing_head_ref(existing, ref);
+ else
+ update_existing_ref(trans, delayed_refs, existing, ref);
+
+ /*
+ * we've updated the existing ref, free the newly
+ * allocated ref
+ */
+ kfree(ref);
+ } else {
+ delayed_refs->num_entries++;
+ trans->delayed_ref_updates++;
+ }
+ return 0;
+}
+
+/*
+ * add a delayed ref to the tree. This does all of the accounting required
+ * to make sure the delayed ref is eventually processed before this
+ * transaction commits.
+ */
+int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
+ u64 ref_generation, u64 owner_objectid, int action,
+ int pin)
+{
+ struct btrfs_delayed_ref *ref;
+ struct btrfs_delayed_ref_head *head_ref;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ int ret;
+
+ ref = kmalloc(sizeof(*ref), GFP_NOFS);
+ if (!ref)
+ return -ENOMEM;
+
+ /*
+ * the parent = 0 case comes from cases where we don't actually
+ * know the parent yet. It will get updated later via a add/drop
+ * pair.
+ */
+ if (parent == 0)
+ parent = bytenr;
+
+ head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+ if (!head_ref) {
+ kfree(ref);
+ return -ENOMEM;
+ }
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+
+ /*
+ * insert both the head node and the new ref without dropping
+ * the spin lock
+ */
+ ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
+ (u64)-1, 0, 0, 0, action, pin);
+ BUG_ON(ret);
+
+ ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
+ parent, ref_root, ref_generation,
+ owner_objectid, action, pin);
+ BUG_ON(ret);
+ spin_unlock(&delayed_refs->lock);
+ return 0;
+}
+
+/*
+ * add a delayed ref to the tree. This does all of the accounting required
+ * to make sure the delayed ref is eventually processed before this
+ * transaction commits.
+ *
+ * The main point of this call is to add and remove a backreference in a single
+ * shot, taking the lock only once, and only searching for the head node once.
+ *
+ * It is the same as doing a ref add and delete in two separate calls.
+ */
+int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes, u64 orig_parent,
+ u64 parent, u64 orig_ref_root, u64 ref_root,
+ u64 orig_ref_generation, u64 ref_generation,
+ u64 owner_objectid, int pin)
+{
+ struct btrfs_delayed_ref *ref;
+ struct btrfs_delayed_ref *old_ref;
+ struct btrfs_delayed_ref_head *head_ref;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ int ret;
+
+ ref = kmalloc(sizeof(*ref), GFP_NOFS);
+ if (!ref)
+ return -ENOMEM;
+
+ old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
+ if (!old_ref) {
+ kfree(ref);
+ return -ENOMEM;
+ }
+
+ /*
+ * the parent = 0 case comes from cases where we don't actually
+ * know the parent yet. It will get updated later via a add/drop
+ * pair.
+ */
+ if (parent == 0)
+ parent = bytenr;
+ if (orig_parent == 0)
+ orig_parent = bytenr;
+
+ head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+ if (!head_ref) {
+ kfree(ref);
+ kfree(old_ref);
+ return -ENOMEM;
+ }
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+
+ /*
+ * insert both the head node and the new ref without dropping
+ * the spin lock
+ */
+ ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
+ (u64)-1, 0, 0, 0,
+ BTRFS_ADD_DELAYED_REF, 0);
+ BUG_ON(ret);
+
+ ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
+ parent, ref_root, ref_generation,
+ owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
+ BUG_ON(ret);
+
+ ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
+ orig_parent, orig_ref_root,
+ orig_ref_generation, owner_objectid,
+ BTRFS_DROP_DELAYED_REF, pin);
+ BUG_ON(ret);
+ spin_unlock(&delayed_refs->lock);
+ return 0;
+}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
new file mode 100644
index 00000000000..37919e5c007
--- /dev/null
+++ b/fs/btrfs/delayed-ref.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#ifndef __DELAYED_REF__
+#define __DELAYED_REF__
+
+/* these are the possible values of struct btrfs_delayed_ref->action */
+#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
+#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
+#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
+
+struct btrfs_delayed_ref_node {
+ struct rb_node rb_node;
+
+ /* the starting bytenr of the extent */
+ u64 bytenr;
+
+ /* the parent our backref will point to */
+ u64 parent;
+
+ /* the size of the extent */
+ u64 num_bytes;
+
+ /* ref count on this data structure */
+ atomic_t refs;
+
+ /*
+ * how many refs is this entry adding or deleting. For
+ * head refs, this may be a negative number because it is keeping
+ * track of the total mods done to the reference count.
+ * For individual refs, this will always be a positive number
+ *
+ * It may be more than one, since it is possible for a single
+ * parent to have more than one ref on an extent
+ */
+ int ref_mod;
+
+ /* is this node still in the rbtree? */
+ unsigned int in_tree:1;
+};
+
+/*
+ * the head refs are used to hold a lock on a given extent, which allows us
+ * to make sure that only one process is running the delayed refs
+ * at a time for a single extent. They also store the sum of all the
+ * reference count modifications we've queued up.
+ */
+struct btrfs_delayed_ref_head {
+ struct btrfs_delayed_ref_node node;
+
+ /*
+ * the mutex is held while running the refs, and it is also
+ * held when checking the sum of reference modifications.
+ */
+ struct mutex mutex;
+
+ /*
+ * when a new extent is allocated, it is just reserved in memory
+ * The actual extent isn't inserted into the extent allocation tree
+ * until the delayed ref is processed. must_insert_reserved is
+ * used to flag a delayed ref so the accounting can be updated
+ * when a full insert is done.
+ *
+ * It is possible the extent will be freed before it is ever
+ * inserted into the extent allocation tree. In this case
+ * we need to update the in ram accounting to properly reflect
+ * the free has happened.
+ */
+ unsigned int must_insert_reserved:1;
+};
+
+struct btrfs_delayed_ref {
+ struct btrfs_delayed_ref_node node;
+
+ /* the root objectid our ref will point to */
+ u64 root;
+
+ /* the generation for the backref */
+ u64 generation;
+
+ /* owner_objectid of the backref */
+ u64 owner_objectid;
+
+ /* operation done by this entry in the rbtree */
+ u8 action;
+
+ /* if pin == 1, when the extent is freed it will be pinned until
+ * transaction commit
+ */
+ unsigned int pin:1;
+};
+
+struct btrfs_delayed_ref_root {
+ struct rb_root root;
+
+ /* this spin lock protects the rbtree and the entries inside */
+ spinlock_t lock;
+
+ /* how many delayed ref updates we've queued, used by the
+ * throttling code
+ */
+ unsigned long num_entries;
+
+ /*
+ * set when the tree is flushing before a transaction commit,
+ * used by the throttling code to decide if new updates need
+ * to be run right away
+ */
+ int flushing;
+};
+
+static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
+{
+ WARN_ON(atomic_read(&ref->refs) == 0);
+ if (atomic_dec_and_test(&ref->refs)) {
+ WARN_ON(ref->in_tree);
+ kfree(ref);
+ }
+}
+
+int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
+ u64 ref_generation, u64 owner_objectid, int action,
+ int pin);
+
+struct btrfs_delayed_ref *
+btrfs_find_delayed_ref(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 parent);
+int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
+int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_node *ref,
+ struct btrfs_delayed_ref_head **next_ret);
+int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u32 *refs);
+int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes, u64 orig_parent,
+ u64 parent, u64 orig_ref_root, u64 ref_root,
+ u64 orig_ref_generation, u64 ref_generation,
+ u64 owner_objectid, int pin);
+/*
+ * a node might live in a head or a regular ref, this lets you
+ * test for the proper type to use.
+ */
+static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
+{
+ return node->parent == (u64)-1;
+}
+
+/*
+ * helper functions to cast a node into its container
+ */
+static inline struct btrfs_delayed_ref *
+btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node)
+{
+ WARN_ON(btrfs_delayed_ref_is_head(node));
+ return container_of(node, struct btrfs_delayed_ref, node);
+
+}
+
+static inline struct btrfs_delayed_ref_head *
+btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
+{
+ WARN_ON(!btrfs_delayed_ref_is_head(node));
+ return container_of(node, struct btrfs_delayed_ref_head, node);
+
+}
+#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3e18175248e..4f43e227a29 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1458,6 +1458,7 @@ static int transaction_kthread(void *arg)
struct btrfs_root *root = arg;
struct btrfs_trans_handle *trans;
struct btrfs_transaction *cur;
+ struct btrfs_fs_info *info = root->fs_info;
unsigned long now;
unsigned long delay;
int ret;
@@ -1471,12 +1472,6 @@ static int transaction_kthread(void *arg)
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
- if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
- printk(KERN_INFO "btrfs: total reference cache "
- "size %llu\n",
- root->fs_info->total_ref_cache_size);
- }
-
mutex_lock(&root->fs_info->trans_mutex);
cur = root->fs_info->running_transaction;
if (!cur) {
@@ -1486,13 +1481,30 @@ static int transaction_kthread(void *arg)
now = get_seconds();
if (now < cur->start_time || now - cur->start_time < 30) {
+ unsigned long num_delayed;
+ num_delayed = cur->delayed_refs.num_entries;
mutex_unlock(&root->fs_info->trans_mutex);
delay = HZ * 5;
+
+ /*
+ * we may have been woken up early to start
+ * processing the delayed extent ref updates
+ * If so, run some of them and then loop around again
+ * to see if we need to force a commit
+ */
+ if (num_delayed > 64) {
+ mutex_unlock(&info->transaction_kthread_mutex);
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_run_delayed_refs(trans, root, 256);
+ btrfs_end_transaction(trans, root);
+ continue;
+ }
goto sleep;
}
mutex_unlock(&root->fs_info->trans_mutex);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
+
sleep:
wake_up_process(root->fs_info->cleaner_kthread);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1611,10 +1623,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
extent_io_tree_init(&fs_info->pinned_extents,
fs_info->btree_inode->i_mapping, GFP_NOFS);
- extent_io_tree_init(&fs_info->pending_del,
- fs_info->btree_inode->i_mapping, GFP_NOFS);
- extent_io_tree_init(&fs_info->extent_ins,
- fs_info->btree_inode->i_mapping, GFP_NOFS);
fs_info->do_barriers = 1;
INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
@@ -1629,7 +1637,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->drop_mutex);
- mutex_init(&fs_info->extent_ins_mutex);
mutex_init(&fs_info->pinned_mutex);
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fefe83ad205..9b5da2b013e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -49,10 +49,13 @@ struct pending_extent_op {
int del;
};
-static int finish_current_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root, int all);
-static int del_pending_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root, int all);
+static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, struct btrfs_key *ins,
+ int ref_mod);
+static int update_reserved_extents(struct btrfs_root *root,
+ u64 bytenr, u64 num, int reserve);
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int is_data);
@@ -60,6 +63,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
+static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner_objectid, int pin,
+ int ref_to_drop);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -554,262 +563,13 @@ out:
return ret;
}
-/*
- * updates all the backrefs that are pending on update_list for the
- * extent_root
- */
-static noinline int update_backrefs(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct btrfs_path *path,
- struct list_head *update_list)
-{
- struct btrfs_key key;
- struct btrfs_extent_ref *ref;
- struct btrfs_fs_info *info = extent_root->fs_info;
- struct pending_extent_op *op;
- struct extent_buffer *leaf;
- int ret = 0;
- struct list_head *cur = update_list->next;
- u64 ref_objectid;
- u64 ref_root = extent_root->root_key.objectid;
-
- op = list_entry(cur, struct pending_extent_op, list);
-
-search:
- key.objectid = op->bytenr;
- key.type = BTRFS_EXTENT_REF_KEY;
- key.offset = op->orig_parent;
-
- ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
- BUG_ON(ret);
-
- leaf = path->nodes[0];
-
-loop:
- ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
-
- ref_objectid = btrfs_ref_objectid(leaf, ref);
-
- if (btrfs_ref_root(leaf, ref) != ref_root ||
- btrfs_ref_generation(leaf, ref) != op->orig_generation ||
- (ref_objectid != op->level &&
- ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
- printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
- "root %llu, owner %u\n",
- (unsigned long long)op->bytenr,
- (unsigned long long)op->orig_parent,
- (unsigned long long)ref_root, op->level);
- btrfs_print_leaf(extent_root, leaf);
- BUG();
- }
-