aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/delayed-ref.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/delayed-ref.c')
-rw-r--r--fs/btrfs/delayed-ref.c994
1 files changed, 635 insertions, 359 deletions
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index d6c01c096a4..6d16bea94e1 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -17,11 +17,16 @@
*/
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/sort.h>
#include "ctree.h"
#include "delayed-ref.h"
#include "transaction.h"
+struct kmem_cache *btrfs_delayed_ref_head_cachep;
+struct kmem_cache *btrfs_delayed_tree_ref_cachep;
+struct kmem_cache *btrfs_delayed_data_ref_cachep;
+struct kmem_cache *btrfs_delayed_extent_op_cachep;
/*
* delayed back reference update tracking. For subvolume trees
* we queue up extent allocations and backref maintenance for
@@ -29,27 +34,100 @@
* add extents in the middle of btrfs_search_slot, and it allows
* us to buffer up frequently modified backrefs in an rb tree instead
* of hammering updates on the extent allocation tree.
- *
- * Right now this code is only used for reference counted trees, but
- * the long term goal is to get rid of the similar code for delayed
- * extent tree modifications.
*/
/*
- * entries in the rb tree are ordered by the byte number of the extent
- * and by the byte number of the parent block.
+ * compare two delayed tree backrefs with same bytenr and type
+ */
+static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
+ struct btrfs_delayed_tree_ref *ref1, int type)
+{
+ if (type == BTRFS_TREE_BLOCK_REF_KEY) {
+ if (ref1->root < ref2->root)
+ return -1;
+ if (ref1->root > ref2->root)
+ return 1;
+ } else {
+ if (ref1->parent < ref2->parent)
+ return -1;
+ if (ref1->parent > ref2->parent)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * compare two delayed data backrefs with same bytenr and type
+ */
+static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
+ struct btrfs_delayed_data_ref *ref1)
+{
+ if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
+ if (ref1->root < ref2->root)
+ return -1;
+ if (ref1->root > ref2->root)
+ return 1;
+ if (ref1->objectid < ref2->objectid)
+ return -1;
+ if (ref1->objectid > ref2->objectid)
+ return 1;
+ if (ref1->offset < ref2->offset)
+ return -1;
+ if (ref1->offset > ref2->offset)
+ return 1;
+ } else {
+ if (ref1->parent < ref2->parent)
+ return -1;
+ if (ref1->parent > ref2->parent)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * entries in the rb tree are ordered by the byte number of the extent,
+ * type of the delayed backrefs and content of delayed backrefs.
*/
-static int comp_entry(struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 parent)
+static int comp_entry(struct btrfs_delayed_ref_node *ref2,
+ struct btrfs_delayed_ref_node *ref1,
+ bool compare_seq)
{
- if (bytenr < ref->bytenr)
+ if (ref1->bytenr < ref2->bytenr)
return -1;
- if (bytenr > ref->bytenr)
+ if (ref1->bytenr > ref2->bytenr)
return 1;
- if (parent < ref->parent)
+ if (ref1->is_head && ref2->is_head)
+ return 0;
+ if (ref2->is_head)
+ return -1;
+ if (ref1->is_head)
+ return 1;
+ if (ref1->type < ref2->type)
return -1;
- if (parent > ref->parent)
+ if (ref1->type > ref2->type)
return 1;
+ if (ref1->no_quota > ref2->no_quota)
+ return 1;
+ if (ref1->no_quota < ref2->no_quota)
+ return -1;
+ /* merging of sequenced refs is not allowed */
+ if (compare_seq) {
+ if (ref1->seq < ref2->seq)
+ return -1;
+ if (ref1->seq > ref2->seq)
+ return 1;
+ }
+ if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
+ return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
+ btrfs_delayed_node_to_tree_ref(ref1),
+ ref1->type);
+ } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
+ ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
+ return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
+ btrfs_delayed_node_to_data_ref(ref1));
+ }
+ BUG();
return 0;
}
@@ -59,20 +137,21 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref,
* inserted.
*/
static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
- u64 bytenr, u64 parent,
struct rb_node *node)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_node *entry;
+ struct btrfs_delayed_ref_node *ins;
int cmp;
+ ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
rb_node);
- cmp = comp_entry(entry, bytenr, parent);
+ cmp = comp_entry(entry, ins, 1);
if (cmp < 0)
p = &(*p)->rb_left;
else if (cmp > 0)
@@ -81,38 +160,77 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
return entry;
}
- entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ rb_link_node(node, parent_node, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
+/* insert a new ref to head ref rbtree */
+static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
+ struct rb_node *node)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent_node = NULL;
+ struct btrfs_delayed_ref_head *entry;
+ struct btrfs_delayed_ref_head *ins;
+ u64 bytenr;
+
+ ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
+ bytenr = ins->node.bytenr;
+ while (*p) {
+ parent_node = *p;
+ entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
+ href_node);
+
+ if (bytenr < entry->node.bytenr)
+ p = &(*p)->rb_left;
+ else if (bytenr > entry->node.bytenr)
+ p = &(*p)->rb_right;
+ else
+ return entry;
+ }
+
rb_link_node(node, parent_node, p);
rb_insert_color(node, root);
return NULL;
}
/*
- * find an entry based on (bytenr,parent). This returns the delayed
- * ref if it was able to find one, or NULL if nothing was in that spot
+ * find an head entry based on bytenr. This returns the delayed ref
+ * head if it was able to find one, or NULL if nothing was in that spot.
+ * If return_bigger is given, the next bigger entry is returned if no exact
+ * match is found.
*/
-static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
- u64 bytenr, u64 parent,
- struct btrfs_delayed_ref_node **last)
+static struct btrfs_delayed_ref_head *
+find_ref_head(struct rb_root *root, u64 bytenr,
+ int return_bigger)
{
- struct rb_node *n = root->rb_node;
- struct btrfs_delayed_ref_node *entry;
- int cmp;
+ struct rb_node *n;
+ struct btrfs_delayed_ref_head *entry;
+ n = root->rb_node;
+ entry = NULL;
while (n) {
- entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
- WARN_ON(!entry->in_tree);
- if (last)
- *last = entry;
+ entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
- cmp = comp_entry(entry, bytenr, parent);
- if (cmp < 0)
+ if (bytenr < entry->node.bytenr)
n = n->rb_left;
- else if (cmp > 0)
+ else if (bytenr > entry->node.bytenr)
n = n->rb_right;
else
return entry;
}
+ if (entry && return_bigger) {
+ if (bytenr > entry->node.bytenr) {
+ n = rb_next(&entry->href_node);
+ if (!n)
+ n = rb_first(root);
+ entry = rb_entry(n, struct btrfs_delayed_ref_head,
+ href_node);
+ return entry;
+ }
+ return entry;
+ }
return NULL;
}
@@ -140,188 +258,188 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
-int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
- struct list_head *cluster, u64 start)
+static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head,
+ struct btrfs_delayed_ref_node *ref)
{
- int count = 0;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_head *head;
-
- delayed_refs = &trans->transaction->delayed_refs;
- if (start == 0) {
- node = rb_first(&delayed_refs->root);
+ if (btrfs_delayed_ref_is_head(ref)) {
+ head = btrfs_delayed_node_to_head(ref);
+ rb_erase(&head->href_node, &delayed_refs->href_root);
} else {
- ref = NULL;
- tree_search(&delayed_refs->root, start, (u64)-1, &ref);
- if (ref) {
- struct btrfs_delayed_ref_node *tmp;
-
- node = rb_prev(&ref->rb_node);
- while (node) {
- tmp = rb_entry(node,
- struct btrfs_delayed_ref_node,
- rb_node);
- if (tmp->bytenr < start)
- break;
- ref = tmp;
- node = rb_prev(&ref->rb_node);
- }
- node = &ref->rb_node;
- } else
- node = rb_first(&delayed_refs->root);
+ assert_spin_locked(&head->lock);
+ rb_erase(&ref->rb_node, &head->ref_root);
}
-again:
- while (node && count < 32) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- if (btrfs_delayed_ref_is_head(ref)) {
- head = btrfs_delayed_node_to_head(ref);
- if (list_empty(&head->cluster)) {
- list_add_tail(&head->cluster, cluster);
- delayed_refs->run_delayed_start =
- head->node.bytenr;
- count++;
-
- WARN_ON(delayed_refs->num_heads_ready == 0);
- delayed_refs->num_heads_ready--;
- } else if (count) {
- /* the goal of the clustering is to find extents
- * that are likely to end up in the same extent
- * leaf on disk. So, we don't want them spread
- * all over the tree. Stop now if we've hit
- * a head that was already in use
- */
- break;
+ ref->in_tree = 0;
+ btrfs_put_delayed_ref(ref);
+ atomic_dec(&delayed_refs->num_entries);
+ if (trans->delayed_ref_updates)
+ trans->delayed_ref_updates--;
+}
+
+static int merge_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head,
+ struct btrfs_delayed_ref_node *ref, u64 seq)
+{
+ struct rb_node *node;
+ int mod = 0;
+ int done = 0;
+
+ node = rb_next(&ref->rb_node);
+ while (!done && node) {
+ struct btrfs_delayed_ref_node *next;
+
+ next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ node = rb_next(node);
+ if (seq && next->seq >= seq)
+ break;
+ if (comp_entry(ref, next, 0))
+ continue;
+
+ if (ref->action == next->action) {
+ mod = next->ref_mod;
+ } else {
+ if (ref->ref_mod < next->ref_mod) {
+ struct btrfs_delayed_ref_node *tmp;
+
+ tmp = ref;
+ ref = next;
+ next = tmp;
+ done = 1;
}
+ mod = -next->ref_mod;
+ }
+
+ drop_delayed_ref(trans, delayed_refs, head, next);
+ ref->ref_mod += mod;
+ if (ref->ref_mod == 0) {
+ drop_delayed_ref(trans, delayed_refs, head, ref);
+ done = 1;
+ } else {
+ /*
+ * You can't have multiples of the same ref on a tree
+ * block.
+ */
+ WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
}
- node = rb_next(node);
- }
- if (count) {
- return 0;
- } else if (start) {
- /*
- * we've gone to the end of the rbtree without finding any
- * clusters. start from the beginning and try again
- */
- start = 0;
- node = rb_first(&delayed_refs->root);
- goto again;
}
- return 1;
+ return done;
}
-/*
- * This checks to see if there are any delayed refs in the
- * btree for a given bytenr. It returns one if it finds any
- * and zero otherwise.
- *
- * If it only finds a head node, it returns 0.
- *
- * The idea is to use this when deciding if you can safely delete an
- * extent from the extent allocation tree. There may be a pending
- * ref in the rbtree that adds or removes references, so as long as this
- * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
- * allocation tree.
- */
-int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head)
{
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *prev_node;
- int ret = 0;
+ struct rb_node *node;
+ u64 seq = 0;
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
+ assert_spin_locked(&head->lock);
+ /*
+ * We don't have too much refs to merge in the case of delayed data
+ * refs.
+ */
+ if (head->is_data)
+ return;
+
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ if (!list_empty(&fs_info->tree_mod_seq_list)) {
+ struct seq_list *elem;
+
+ elem = list_first_entry(&fs_info->tree_mod_seq_list,
+ struct seq_list, list);
+ seq = elem->seq;
+ }
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+
+ node = rb_first(&head->ref_root);
+ while (node) {
+ struct btrfs_delayed_ref_node *ref;
- ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
- if (ref) {
- prev_node = rb_prev(&ref->rb_node);
- if (!prev_node)
- goto out;
- ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
+ ref = rb_entry(node, struct btrfs_delayed_ref_node,
rb_node);
- if (ref->bytenr == bytenr)
+ /* We can't merge refs that are outside of our seq count */
+ if (seq && ref->seq >= seq)
+ break;
+ if (merge_ref(trans, delayed_refs, head, ref, seq))
+ node = rb_first(&head->ref_root);
+ else
+ node = rb_next(&ref->rb_node);
+ }
+}
+
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ u64 seq)
+{
+ struct seq_list *elem;
+ int ret = 0;
+
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ if (!list_empty(&fs_info->tree_mod_seq_list)) {
+ elem = list_first_entry(&fs_info->tree_mod_seq_list,
+ struct seq_list, list);
+ if (seq >= elem->seq) {
+ pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n",
+ (u32)(seq >> 32), (u32)seq,
+ (u32)(elem->seq >> 32), (u32)elem->seq,
+ delayed_refs);
ret = 1;
+ }
}
-out:
- spin_unlock(&delayed_refs->lock);
+
+ spin_unlock(&fs_info->tree_mod_seq_lock);
return ret;
}
-/*
- * helper function to lookup reference count
- *
- * the head node for delayed ref is used to store the sum of all the
- * reference count modifications queued up in the rbtree. This way you
- * can check to see what the reference count would be if all of the
- * delayed refs are processed.
- */
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u32 *refs)
+struct btrfs_delayed_ref_head *
+btrfs_select_ref_head(struct btrfs_trans_handle *trans)
{
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_extent_item *ei;
- struct btrfs_key key;
- u32 num_refs;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ struct btrfs_delayed_ref_head *head;
+ u64 start;
+ bool loop = false;
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
delayed_refs = &trans->transaction->delayed_refs;
+
again:
- ret = btrfs_search_slot(trans, root->fs_info->extent_root,
- &key, path, 0, 0);
- if (ret < 0)
- goto out;
-
- if (ret == 0) {
- leaf = path->nodes[0];
- ei = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item);
- num_refs = btrfs_extent_refs(leaf, ei);
- } else {
- num_refs = 0;
- ret = 0;
+ start = delayed_refs->run_delayed_start;
+ head = find_ref_head(&delayed_refs->href_root, start, 1);
+ if (!head && !loop) {
+ delayed_refs->run_delayed_start = 0;
+ start = 0;
+ loop = true;
+ head = find_ref_head(&delayed_refs->href_root, start, 1);
+ if (!head)
+ return NULL;
+ } else if (!head && loop) {
+ return NULL;
}
- spin_lock(&delayed_refs->lock);
- ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
- if (ref) {
- head = btrfs_delayed_node_to_head(ref);
- if (mutex_trylock(&head->mutex)) {
- num_refs += ref->ref_mod;
- mutex_unlock(&head->mutex);
- *refs = num_refs;
- goto out;
+ while (head->processing) {
+ struct rb_node *node;
+
+ node = rb_next(&head->href_node);
+ if (!node) {
+ if (loop)
+ return NULL;
+ delayed_refs->run_delayed_start = 0;
+ start = 0;
+ loop = true;
+ goto again;
}
-
- atomic_inc(&ref->refs);
- spin_unlock(&delayed_refs->lock);
-
- btrfs_release_path(root->fs_info->extent_root, path);
-
- mutex_lock(&head->mutex);
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(ref);
- goto again;
- } else {
- *refs = num_refs;
+ head = rb_entry(node, struct btrfs_delayed_ref_head,
+ href_node);
}
-out:
- spin_unlock(&delayed_refs->lock);
- btrfs_free_path(path);
- return ret;
+
+ head->processing = 1;
+ WARN_ON(delayed_refs->num_heads_ready == 0);
+ delayed_refs->num_heads_ready--;
+ delayed_refs->run_delayed_start = head->node.bytenr +
+ head->node.num_bytes;
+ return head;
}
/*
@@ -335,19 +453,11 @@ out:
static noinline void
update_existing_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *existing,
struct btrfs_delayed_ref_node *update)
{
- struct btrfs_delayed_ref *existing_ref;
- struct btrfs_delayed_ref *ref;
-
- existing_ref = btrfs_delayed_node_to_ref(existing);
- ref = btrfs_delayed_node_to_ref(update);
-
- if (ref->pin)
- existing_ref->pin = 1;
-
- if (ref->action != existing_ref->action) {
+ if (update->action != existing->action) {
/*
* this is effectively undoing either an add or a
* drop. We decrement the ref_mod, and if it goes
@@ -355,28 +465,14 @@ update_existing_ref(struct btrfs_trans_handle *trans,
* every changing the extent allocation tree.
*/
existing->ref_mod--;
- if (existing->ref_mod == 0) {
- rb_erase(&existing->rb_node,
- &delayed_refs->root);
- existing->in_tree = 0;
- btrfs_put_delayed_ref(existing);
- delayed_refs->num_entries--;
- if (trans->delayed_ref_updates)
- trans->delayed_ref_updates--;
- }
+ if (existing->ref_mod == 0)
+ drop_delayed_ref(trans, delayed_refs, head, existing);
+ else
+ WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
} else {
- if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
- /* if we're adding refs, make sure all the
- * details match up. The extent could
- * have been totally freed and reallocated
- * by a different owner before the delayed
- * ref entries were removed.
- */
- existing_ref->owner_objectid = ref->owner_objectid;
- existing_ref->generation = ref->generation;
- existing_ref->root = ref->root;
- existing->num_bytes = update->num_bytes;
- }
+ WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
/*
* the action on the existing ref matches
* the action on the ref we're trying to add.
@@ -401,7 +497,9 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
existing_ref = btrfs_delayed_node_to_head(existing);
ref = btrfs_delayed_node_to_head(update);
+ BUG_ON(existing_ref->is_data != ref->is_data);
+ spin_lock(&existing_ref->lock);
if (ref->must_insert_reserved) {
/* if the extent was freed and then
* reallocated before the delayed ref
@@ -420,26 +518,45 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
}
+ if (ref->extent_op) {
+ if (!existing_ref->extent_op) {
+ existing_ref->extent_op = ref->extent_op;
+ } else {
+ if (ref->extent_op->update_key) {
+ memcpy(&existing_ref->extent_op->key,
+ &ref->extent_op->key,
+ sizeof(ref->extent_op->key));
+ existing_ref->extent_op->update_key = 1;
+ }
+ if (ref->extent_op->update_flags) {
+ existing_ref->extent_op->flags_to_set |=
+ ref->extent_op->flags_to_set;
+ existing_ref->extent_op->update_flags = 1;
+ }
+ btrfs_free_delayed_extent_op(ref->extent_op);
+ }
+ }
/*
- * update the reference mod on the head to reflect this new operation
+ * update the reference mod on the head to reflect this new operation,
+ * only need the lock for this case cause we could be processing it
+ * currently, for refs we just added we know we're a-ok.
*/
existing->ref_mod += update->ref_mod;
+ spin_unlock(&existing_ref->lock);
}
/*
- * helper function to actually insert a delayed ref into the rbtree.
+ * helper function to actually insert a head node into the rbtree.
* this does all the dirty work in terms of maintaining the correct
- * overall modification count in the head node and properly dealing
- * with updating existing nodes as new modifications are queued.
+ * overall modification count.
*/
-static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
- u64 ref_generation, u64 owner_objectid, int action,
- int pin)
+static noinline struct btrfs_delayed_ref_head *
+add_delayed_ref_head(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_node *ref, u64 bytenr,
+ u64 num_bytes, int action, int is_data)
{
- struct btrfs_delayed_ref_node *existing;
- struct btrfs_delayed_ref *full_ref;
+ struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
@@ -449,12 +566,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
* the head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
*/
- if (parent == (u64)-1) {
- if (action == BTRFS_DROP_DELAYED_REF)
- count_mod = -1;
- else if (action == BTRFS_UPDATE_DELAYED_HEAD)
- count_mod = 0;
- }
+ if (action == BTRFS_UPDATE_DELAYED_HEAD)
+ count_mod = 0;
+ else if (action == BTRFS_DROP_DELAYED_REF)
+ count_mod = -1;
/*
* BTRFS_ADD_DELAYED_EXTENT means that we need to update
@@ -467,95 +582,213 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
* Once we record must_insert_reserved, switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not required.
*/
- if (action == BTRFS_ADD_DELAYED_EXTENT) {
+ if (action == BTRFS_ADD_DELAYED_EXTENT)
must_insert_reserved = 1;
- action = BTRFS_ADD_DELAYED_REF;
- } else {
+ else
must_insert_reserved = 0;
- }
-
delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
ref->bytenr = bytenr;
- ref->parent = parent;
+ ref->num_bytes = num_bytes;
ref->ref_mod = count_mod;
+ ref->type = 0;
+ ref->action = 0;
+ ref->is_head = 1;
ref->in_tree = 1;
- ref->num_bytes = num_bytes;
+ ref->seq = 0;
- if (btrfs_delayed_ref_is_head(ref)) {
- head_ref = btrfs_delayed_node_to_head(ref);
- head_ref->must_insert_reserved = must_insert_reserved;
- INIT_LIST_HEAD(&head_ref->cluster);
- mutex_init(&head_ref->mutex);
+ head_ref = btrfs_delayed_node_to_head(ref);
+ head_ref->must_insert_reserved = must_insert_reserved;
+ head_ref->is_data = is_data;
+ head_ref->ref_root = RB_ROOT;
+ head_ref->processing = 0;
+
+ spin_lock_init(&head_ref->lock);
+ mutex_init(&head_ref->mutex);
+
+ trace_add_delayed_ref_head(ref, head_ref, action);
+
+ existing = htree_insert(&delayed_refs->href_root,
+ &head_ref->href_node);
+ if (existing) {
+ update_existing_head_ref(&existing->node, ref);
+ /*
+ * we've updated the existing ref, free the newly
+ * allocated ref
+ */
+ kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+ head_ref = existing;
} else {
- full_ref = btrfs_delayed_node_to_ref(ref);
- full_ref->root = ref_root;
- full_ref->generation = ref_generation;
- full_ref->owner_objectid = owner_objectid;
- full_ref->pin = pin;
- full_ref->action = action;
+ delayed_refs->num_heads++;
+ delayed_refs->num_heads_ready++;
+ atomic_inc(&delayed_refs->num_entries);
+ trans->delayed_ref_updates++;
}
+ return head_ref;
+}
+
+/*
+ * helper to insert a delayed tree ref into the rbtree.
+ */
+static noinline void
+add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_delayed_ref_node *ref, u64 bytenr,
+ u64 num_bytes, u64 parent, u64 ref_root, int level,
+ int action, int no_quota)
+{
+ struct btrfs_delayed_ref_node *existing;
+ struct btrfs_delayed_tree_ref *full_ref;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ u64 seq = 0;
- existing = tree_insert(&delayed_refs->root, bytenr,
- parent, &ref->rb_node);
+ if (action == BTRFS_ADD_DELAYED_EXTENT)
+ action = BTRFS_ADD_DELAYED_REF;
+
+ if (is_fstree(ref_root))
+ seq = atomic64_read(&fs_info->tree_mod_seq);
+ delayed_refs = &trans->transaction->delayed_refs;
+ /* first set the basic ref node struct up */
+ atomic_set(&ref->refs, 1);
+ ref->bytenr = bytenr;
+ ref->num_bytes = num_bytes;
+ ref->ref_mod = 1;
+ ref->action = action;
+ ref->is_head = 0;
+ ref->in_tree = 1;
+ ref->no_quota = no_quota;
+ ref->seq = seq;
+
+ full_ref = btrfs_delayed_node_to_tree_ref(ref);
+ full_ref->parent = parent;
+ full_ref->root = ref_root;
+ if (parent)
+ ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
+ else
+ ref->type = BTRFS_TREE_BLOCK_REF_KEY;
+ full_ref->level = level;
+
+ trace_add_delayed_tree_ref(ref, full_ref, action);
+
+ spin_lock(&head_ref->lock);
+ existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
if (existing) {
- if (btrfs_delayed_ref_is_head(ref))
- update_existing_head_ref(existing, ref);
- else
- update_existing_ref(trans, delayed_refs, existing, ref);
+ update_existing_ref(trans, delayed_refs, head_ref, existing,
+ ref);
+ /*
+ * we've updated the existing ref, free the newly
+ * allocated ref
+ */
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
+ } else {
+ atomic_inc(&delayed_refs->num_entries);
+ trans->delayed_ref_updates++;
+ }
+ spin_unlock(&head_ref->lock);
+}
+
+/*
+ * helper to insert a delayed data ref into the rbtree.
+ */
+static noinline void
+add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_delayed_ref_node *ref, u64 bytenr,
+ u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
+ u64 offset, int action, int no_quota)
+{
+ struct btrfs_delayed_ref_node *existing;
+ struct btrfs_delayed_data_ref *full_ref;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ u64 seq = 0;
+
+ if (action == BTRFS_ADD_DELAYED_EXTENT)
+ action = BTRFS_ADD_DELAYED_REF;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+
+ if (is_fstree(ref_root))
+ seq = atomic64_read(&fs_info->tree_mod_seq);
+ /* first set the basic ref node struct up */
+ atomic_set(&ref->refs, 1);
+ ref->bytenr = bytenr;
+ ref->num_bytes = num_bytes;
+ ref->ref_mod = 1;
+ ref->action = action;
+ ref->is_head = 0;
+ ref->in_tree = 1;
+ ref->no_quota = no_quota;
+ ref->seq = seq;
+
+ full_ref = btrfs_delayed_node_to_data_ref(ref);
+ full_ref->parent = parent;
+ full_ref->root = ref_root;
+ if (parent)
+ ref->type = BTRFS_SHARED_DATA_REF_KEY;
+ else
+ ref->type = BTRFS_EXTENT_DATA_REF_KEY;
+
+ full_ref->objectid = owner;
+ full_ref->offset = offset;
+
+ trace_add_delayed_data_ref(ref, full_ref, action);
+
+ spin_lock(&head_ref->lock);
+ existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
+ if (existing) {
+ update_existing_ref(trans, delayed_refs, head_ref, existing,
+ ref);
/*
* we've updated the existing ref, free the newly
* allocated ref
*/
- kfree(ref);
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
} else {
- if (btrfs_delayed_ref_is_head(ref)) {
- delayed_refs->num_heads++;
- delayed_refs->num_heads_ready++;
- }
- delayed_refs->num_entries++;
+ atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
- return 0;
+ spin_unlock(&head_ref->lock);
}
/*
- * add a delayed ref to the tree. This does all of the accounting required
+ * add a delayed tree ref. This does all of the accounting required
* to make sure the delayed ref is eventually processed before this
* transaction commits.
*/
-int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
- u64 ref_generation, u64 owner_objectid, int action,
- int pin)
+int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 ref_root, int level, int action,
+ struct btrfs_delayed_extent_op *extent_op,
+ int no_quota)
{
- struct btrfs_delayed_ref *ref;
+ struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
- int ret;
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
+ if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+ no_quota = 0;
+
+ BUG_ON(extent_op && extent_op->is_data);
+ ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
- /*
- * the parent = 0 case comes from cases where we don't actually
- * know the parent yet. It will get updated later via a add/drop
- * pair.
- */
- if (parent == 0)
- parent = bytenr;
-
- head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+ head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
- kfree(ref);
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
return -ENOMEM;
}
+
+ head_ref->extent_op = extent_op;
+
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
@@ -563,84 +796,48 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
- (u64)-1, 0, 0, 0, action, pin);
- BUG_ON(ret);
-
- ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
- parent, ref_root, ref_generation,
- owner_objectid, action, pin);
- BUG_ON(ret);
- spin_unlock(&delayed_refs->lock);
- return 0;
-}
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ bytenr, num_bytes, action, 0);
-/*
- * this does a simple search for the head node for a given extent.
- * It must be called with the delayed ref spinlock held, and it returns
- * the head node if any where found, or NULL if not.
- */
-struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
-{
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_root *delayed_refs;
+ add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
+ num_bytes, parent, ref_root, level, action,
+ no_quota);
+ spin_unlock(&delayed_refs->lock);
- delayed_refs = &trans->transaction->delayed_refs;
- ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
- if (ref)
- return btrfs_delayed_node_to_head(ref);
- return NULL;
+ return 0;
}
/*
- * add a delayed ref to the tree. This does all of the accounting required
- * to make sure the delayed ref is eventually processed before this
- * transaction commits.
- *
- * The main point of this call is to add and remove a backreference in a single
- * shot, taking the lock only once, and only searching for the head node once.
- *
- * It is the same as doing a ref add and delete in two separate calls.
+ * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
-int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 orig_parent,
- u64 parent, u64 orig_ref_root, u64 ref_root,
- u64 orig_ref_generation, u64 ref_generation,
- u64 owner_objectid, int pin)
+int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes,
+ u64 parent, u64 ref_root,
+ u64 owner, u64 offset, int action,
+ struct btrfs_delayed_extent_op *extent_op,
+ int no_quota)
{
- struct btrfs_delayed_ref *ref;
- struct btrfs_delayed_ref *old_ref;
+ struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
- int ret;
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
+ if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+ no_quota = 0;
+
+ BUG_ON(extent_op && !extent_op->is_data);
+ ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
- old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
- if (!old_ref) {
- kfree(ref);
+ head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
+ if (!head_ref) {
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
return -ENOMEM;
}
- /*
- * the parent = 0 case comes from cases where we don't actually
- * know the parent yet. It will get updated later via a add/drop
- * pair.
- */
- if (parent == 0)
- parent = bytenr;
- if (orig_parent == 0)
- orig_parent = bytenr;
+ head_ref->extent_op = extent_op;
- head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
- if (!head_ref) {
- kfree(ref);
- kfree(old_ref);
- return -ENOMEM;
- }
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
@@ -648,21 +845,100 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
- (u64)-1, 0, 0, 0,
- BTRFS_UPDATE_DELAYED_HEAD, 0);
- BUG_ON(ret);
-
- ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
- parent, ref_root, ref_generation,
- owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
- BUG_ON(ret);
-
- ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
- orig_parent, orig_ref_root,
- orig_ref_generation, owner_objectid,
- BTRFS_DROP_DELAYED_REF, pin);
- BUG_ON(ret);
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ bytenr, num_bytes, action, 1);
+
+ add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
+ num_bytes, parent, ref_root, owner, offset,
+ action, no_quota);
spin_unlock(&delayed_refs->lock);
+
+ return 0;
+}
+
+int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes,
+ struct btrfs_delayed_extent_op *extent_op)
+{
+ struct btrfs_delayed_ref_head *head_ref;
+ struct btrfs_delayed_ref_root *delayed_refs;
+
+ head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
+ if (!head_ref)
+ return -ENOMEM;
+
+ head_ref->extent_op = extent_op;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+
+ add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
+ num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+ extent_op->is_data);
+
+ spin_unlock(&delayed_refs->lock);
+ return 0;
+}
+
+/*
+ * this does a simple search for the head node for a given extent.
+ * It must be called with the delayed ref spinlock held, and it returns
+ * the head node if any where found, or NULL if not.
+ */
+struct btrfs_delayed_ref_head *
+btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
+{
+ struct btrfs_delayed_ref_root *delayed_refs;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ return find_ref_head(&delayed_refs->href_root, bytenr, 0);
+}
+
+void btrfs_delayed_ref_exit(void)
+{
+ if (btrfs_delayed_ref_head_cachep)
+ kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
+ if (btrfs_delayed_tree_ref_cachep)
+ kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
+ if (btrfs_delayed_data_ref_cachep)
+ kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
+ if (btrfs_delayed_extent_op_cachep)
+ kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
+}
+
+int btrfs_delayed_ref_init(void)
+{
+ btrfs_delayed_ref_head_cachep = kmem_cache_create(
+ "btrfs_delayed_ref_head",
+ sizeof(struct btrfs_delayed_ref_head), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ if (!btrfs_delayed_ref_head_cachep)
+ goto fail;
+
+ btrfs_delayed_tree_ref_cachep = kmem_cache_create(
+ "btrfs_delayed_tree_ref",
+ sizeof(struct btrfs_delayed_tree_ref), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ if (!btrfs_delayed_tree_ref_cachep)
+ goto fail;
+
+ btrfs_delayed_data_ref_cachep = kmem_cache_create(
+ "btrfs_delayed_data_ref",
+ sizeof(struct btrfs_delayed_data_ref), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ if (!btrfs_delayed_data_ref_cachep)
+ goto fail;
+
+ btrfs_delayed_extent_op_cachep = kmem_cache_create(
+ "btrfs_delayed_extent_op",
+ sizeof(struct btrfs_delayed_extent_op), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ if (!btrfs_delayed_extent_op_cachep)
+ goto fail;
+
return 0;
+fail:
+ btrfs_delayed_ref_exit();
+ return -ENOMEM;
}