diff options
-rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 3 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 12 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 585 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.h | 182 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 29 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 1496 | ||||
-rw-r--r-- | fs/btrfs/file.c | 6 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 54 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 3 | ||||
-rw-r--r-- | fs/btrfs/tree-defrag.c | 2 |
11 files changed, 1234 insertions, 1140 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d2cf5a54a4b..9adf5e4f7e9 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ - compression.o + compression.o delayed-ref.o else # Normal Makefile diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 87c90387283..bebc9fd1666 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -922,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, spin_unlock(&root->node_lock); ret = btrfs_update_extent_ref(trans, root, child->start, + child->len, mid->start, child->start, root->root_key.objectid, trans->transid, level - 1); @@ -2075,7 +2076,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, spin_unlock(&root->node_lock); ret = btrfs_update_extent_ref(trans, root, lower->start, - lower->start, c->start, + lower->len, lower->start, c->start, root->root_key.objectid, trans->transid, level - 1); BUG_ON(ret); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3a37ba7a8d6..ced5fd85dc3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -688,8 +688,6 @@ struct btrfs_fs_info { struct rb_root block_group_cache_tree; struct extent_io_tree pinned_extents; - struct extent_io_tree pending_del; - struct extent_io_tree extent_ins; /* logical->physical extent mapping */ struct btrfs_mapping_tree mapping_tree; @@ -717,7 +715,6 @@ struct btrfs_fs_info { struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; - struct mutex extent_ins_mutex; struct mutex pinned_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; @@ -1704,18 +1701,15 @@ static inline struct dentry *fdentry(struct file *file) } /* extent-tree.c */ +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, unsigned long count); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); -int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u32 *refs); int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 bytenr); -int btrfs_extent_post_op(struct btrfs_trans_handle *trans, - struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, @@ -1777,7 +1771,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner_objectid); int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, + struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 orig_parent, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner_objectid); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c new file mode 100644 index 00000000000..874565a1f63 --- /dev/null +++ b/fs/btrfs/delayed-ref.c @@ -0,0 +1,585 @@ +/* + * Copyright (C) 2009 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/sched.h> +#include <linux/sort.h> +#include <linux/ftrace.h> +#include "ctree.h" +#include "delayed-ref.h" +#include "transaction.h" + +/* + * delayed back reference update tracking. For subvolume trees + * we queue up extent allocations and backref maintenance for + * delayed processing. This avoids deep call chains where we + * add extents in the middle of btrfs_search_slot, and it allows + * us to buffer up frequently modified backrefs in an rb tree instead + * of hammering updates on the extent allocation tree. + * + * Right now this code is only used for reference counted trees, but + * the long term goal is to get rid of the similar code for delayed + * extent tree modifications. + */ + +/* + * entries in the rb tree are ordered by the byte number of the extent + * and by the byte number of the parent block. + */ +static int comp_entry(struct btrfs_delayed_ref_node *ref, + u64 bytenr, u64 parent) +{ + if (bytenr < ref->bytenr) + return -1; + if (bytenr > ref->bytenr) + return 1; + if (parent < ref->parent) + return -1; + if (parent > ref->parent) + return 1; + return 0; +} + +/* + * insert a new ref into the rbtree. This returns any existing refs + * for the same (bytenr,parent) tuple, or NULL if the new node was properly + * inserted. + */ +static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, + u64 bytenr, u64 parent, + struct rb_node *node) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent_node = NULL; + struct btrfs_delayed_ref_node *entry; + int cmp; + + while (*p) { + parent_node = *p; + entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, + rb_node); + + cmp = comp_entry(entry, bytenr, parent); + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else + return entry; + } + + entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + rb_link_node(node, parent_node, p); + rb_insert_color(node, root); + return NULL; +} + +/* + * find an entry based on (bytenr,parent). This returns the delayed + * ref if it was able to find one, or NULL if nothing was in that spot + */ +static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, + u64 bytenr, u64 parent) +{ + struct rb_node *n = root->rb_node; + struct btrfs_delayed_ref_node *entry; + int cmp; + + while (n) { + entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); + WARN_ON(!entry->in_tree); + + cmp = comp_entry(entry, bytenr, parent); + if (cmp < 0) + n = n->rb_left; + else if (cmp > 0) + n = n->rb_right; + else + return entry; + } + return NULL; +} + +/* + * Locking on delayed refs is done by taking a lock on the head node, + * which has the (impossible) parent id of (u64)-1. Once a lock is held + * on the head node, you're allowed (and required) to process all the + * delayed refs for a given byte number in the tree. + * + * This will walk forward in the rbtree until it finds a head node it + * is able to lock. It might not lock the delayed ref you asked for, + * and so it will return the one it did lock in next_ret and return 0. + * + * If no locks are taken, next_ret is set to null and 1 is returned. This + * means there are no more unlocked head nodes in the rbtree. + */ +int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head **next_ret) +{ + struct rb_node *node; + struct btrfs_delayed_ref_head *head; + int ret = 0; + + while (1) { + if (btrfs_delayed_ref_is_head(ref)) { + head = btrfs_delayed_node_to_head(ref); + if (mutex_trylock(&head->mutex)) { + *next_ret = head; + ret = 0; + break; + } + } + node = rb_next(&ref->rb_node); + if (!node) { + ret = 1; + *next_ret = NULL; + break; + } + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + } + return ret; +} + +/* + * This checks to see if there are any delayed refs in the + * btree for a given bytenr. It returns one if it finds any + * and zero otherwise. + * + * If it only finds a head node, it returns 0. + * + * The idea is to use this when deciding if you can safely delete an + * extent from the extent allocation tree. There may be a pending + * ref in the rbtree that adds or removes references, so as long as this + * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent + * allocation tree. + */ +int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr) +{ + struct btrfs_delayed_ref_node *ref; + struct btrfs_delayed_ref_root *delayed_refs; + struct rb_node *prev_node; + int ret = 0; + + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + + ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); + if (ref) { + prev_node = rb_prev(&ref->rb_node); + if (!prev_node) + goto out; + ref = rb_entry(prev_node, struct btrfs_delayed_ref_node, + rb_node); + if (ref->bytenr == bytenr) + ret = 1; + } +out: + spin_unlock(&delayed_refs->lock); + return ret; +} + +/* + * helper function to lookup reference count + * + * the head node for delayed ref is used to store the sum of all the + * reference count modifications queued up in the rbtree. This way you + * can check to see what the reference count would be if all of the + * delayed refs are processed. + */ +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs) +{ + struct btrfs_delayed_ref_node *ref; + struct btrfs_delayed_ref_head *head; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u32 num_refs; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + delayed_refs = &trans->transaction->delayed_refs; +again: + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, path, 0, 0); + if (ret < 0) + goto out; + + if (ret == 0) { + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + num_refs = btrfs_extent_refs(leaf, ei); + } else { + num_refs = 0; + ret = 0; + } + + spin_lock(&delayed_refs->lock); + ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); + if (ref) { + head = btrfs_delayed_node_to_head(ref); + if (mutex_trylock(&head->mutex)) { + num_refs += ref->ref_mod; + mutex_unlock(&head->mutex); + *refs = num_refs; + goto out; + } + + atomic_inc(&ref->refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(root->fs_info->extent_root, path); + + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(ref); + goto again; + } else { + *refs = num_refs; + } +out: + spin_unlock(&delayed_refs->lock); + btrfs_free_path(path); + return ret; +} + +/* + * helper function to update an extent delayed ref in the + * rbtree. existing and update must both have the same + * bytenr and parent + * + * This may free existing if the update cancels out whatever + * operation it was doing. + */ +static noinline void +update_existing_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *existing, + struct btrfs_delayed_ref_node *update) +{ + struct btrfs_delayed_ref *existing_ref; + struct btrfs_delayed_ref *ref; + + existing_ref = btrfs_delayed_node_to_ref(existing); + ref = btrfs_delayed_node_to_ref(update); + + if (ref->pin) + existing_ref->pin = 1; + + if (ref->action != existing_ref->action) { + /* + * this is effectively undoing either an add or a + * drop. We decrement the ref_mod, and if it goes + * down to zero we just delete the entry without + * every changing the extent allocation tree. + */ + existing->ref_mod--; + if (existing->ref_mod == 0) { + rb_erase(&existing->rb_node, + &delayed_refs->root); + existing->in_tree = 0; + btrfs_put_delayed_ref(existing); + delayed_refs->num_entries--; + if (trans->delayed_ref_updates) + trans->delayed_ref_updates--; + } + } else { + if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { + /* if we're adding refs, make sure all the + * details match up. The extent could + * have been totally freed and reallocated + * by a different owner before the delayed + * ref entries were removed. + */ + existing_ref->owner_objectid = ref->owner_objectid; + existing_ref->generation = ref->generation; + existing_ref->root = ref->root; + existing->num_bytes = update->num_bytes; + } + /* + * the action on the existing ref matches + * the action on the ref we're trying to add. + * Bump the ref_mod by one so the backref that + * is eventually added/removed has the correct + * reference count + */ + existing->ref_mod += update->ref_mod; + } +} + +/* + * helper function to update the accounting in the head ref + * existing and update must have the same bytenr + */ +static noinline void +update_existing_head_ref(struct btrfs_delayed_ref_node *existing, + struct btrfs_delayed_ref_node *update) +{ + struct btrfs_delayed_ref_head *existing_ref; + struct btrfs_delayed_ref_head *ref; + + existing_ref = btrfs_delayed_node_to_head(existing); + ref = btrfs_delayed_node_to_head(update); + + if (ref->must_insert_reserved) { + /* if the extent was freed and then + * reallocated before the delayed ref + * entries were processed, we can end up + * with an existing head ref without + * the must_insert_reserved flag set. + * Set it again here + */ + existing_ref->must_insert_reserved = ref->must_insert_reserved; + + /* + * update the num_bytes so we make sure the accounting + * is done correctly + */ + existing->num_bytes = update->num_bytes; + + } + + /* + * update the reference mod on the head to reflect this new operation + */ + existing->ref_mod += update->ref_mod; +} + +/* + * helper function to actually insert a delayed ref into the rbtree. + * this does all the dirty work in terms of maintaining the correct + * overall modification count in the head node and properly dealing + * with updating existing nodes as new modifications are queued. + */ +static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_node *ref, + u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, + u64 ref_generation, u64 owner_objectid, int action, + int pin) +{ + struct btrfs_delayed_ref_node *existing; + struct btrfs_delayed_ref *full_ref; + struct btrfs_delayed_ref_head *head_ref; + struct btrfs_delayed_ref_root *delayed_refs; + int count_mod = 1; + int must_insert_reserved = 0; + + /* + * the head node stores the sum of all the mods, so dropping a ref + * should drop the sum in the head node by one. + */ + if (parent == (u64)-1 && action == BTRFS_DROP_DELAYED_REF) + count_mod = -1; + + /* + * BTRFS_ADD_DELAYED_EXTENT means that we need to update + * the reserved accounting when the extent is finally added, or + * if a later modification deletes the delayed ref without ever + * inserting the extent into the extent allocation tree. + * ref->must_insert_reserved is the flag used to record + * that accounting mods are required. + * + * Once we record must_insert_reserved, switch the action to + * BTRFS_ADD_DELAYED_REF because other special casing is not required. + */ + if (action == BTRFS_ADD_DELAYED_EXTENT) { + must_insert_reserved = 1; + action = BTRFS_ADD_DELAYED_REF; + } else { + must_insert_reserved = 0; + } + + + delayed_refs = &trans->transaction->delayed_refs; + + /* first set the basic ref node struct up */ + atomic_set(&ref->refs, 1); + ref->bytenr = bytenr; + ref->parent = parent; + ref->ref_mod = count_mod; + ref->in_tree = 1; + ref->num_bytes = num_bytes; + + if (btrfs_delayed_ref_is_head(ref)) { + head_ref = btrfs_delayed_node_to_head(ref); + head_ref->must_insert_reserved = must_insert_reserved; + mutex_init(&head_ref->mutex); + } else { + full_ref = btrfs_delayed_node_to_ref(ref); + full_ref->root = ref_root; + full_ref->generation = ref_generation; + full_ref->owner_objectid = owner_objectid; + full_ref->pin = pin; + full_ref->action = action; + } + + existing = tree_insert(&delayed_refs->root, bytenr, + parent, &ref->rb_node); + + if (existing) { + if (btrfs_delayed_ref_is_head(ref)) + update_existing_head_ref(existing, ref); + else + update_existing_ref(trans, delayed_refs, existing, ref); + + /* + * we've updated the existing ref, free the newly + * allocated ref + */ + kfree(ref); + } else { + delayed_refs->num_entries++; + trans->delayed_ref_updates++; + } + return 0; +} + +/* + * add a delayed ref to the tree. This does all of the accounting required + * to make sure the delayed ref is eventually processed before this + * transaction commits. + */ +int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, + u64 ref_generation, u64 owner_objectid, int action, + int pin) +{ + struct btrfs_delayed_ref *ref; + struct btrfs_delayed_ref_head *head_ref; + struct btrfs_delayed_ref_root *delayed_refs; + int ret; + + ref = kmalloc(sizeof(*ref), GFP_NOFS); + if (!ref) + return -ENOMEM; + + /* + * the parent = 0 case comes from cases where we don't actually + * know the parent yet. It will get updated later via a add/drop + * pair. + */ + if (parent == 0) + parent = bytenr; + + head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); + if (!head_ref) { + kfree(ref); + return -ENOMEM; + } + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + + /* + * insert both the head node and the new ref without dropping + * the spin lock + */ + ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, + (u64)-1, 0, 0, 0, action, pin); + BUG_ON(ret); + + ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, + parent, ref_root, ref_generation, + owner_objectid, action, pin); + BUG_ON(ret); + spin_unlock(&delayed_refs->lock); + return 0; +} + +/* + * add a delayed ref to the tree. This does all of the accounting required + * to make sure the delayed ref is eventually processed before this + * transaction commits. + * + * The main point of this call is to add and remove a backreference in a single + * shot, taking the lock only once, and only searching for the head node once. + * + * It is the same as doing a ref add and delete in two separate calls. + */ +int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u64 orig_parent, + u64 parent, u64 orig_ref_root, u64 ref_root, + u64 orig_ref_generation, u64 ref_generation, + u64 owner_objectid, int pin) +{ + struct btrfs_delayed_ref *ref; + struct btrfs_delayed_ref *old_ref; + struct btrfs_delayed_ref_head *head_ref; + struct btrfs_delayed_ref_root *delayed_refs; + int ret; + + ref = kmalloc(sizeof(*ref), GFP_NOFS); + if (!ref) + return -ENOMEM; + + old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS); + if (!old_ref) { + kfree(ref); + return -ENOMEM; + } + + /* + * the parent = 0 case comes from cases where we don't actually + * know the parent yet. It will get updated later via a add/drop + * pair. + */ + if (parent == 0) + parent = bytenr; + if (orig_parent == 0) + orig_parent = bytenr; + + head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); + if (!head_ref) { + kfree(ref); + kfree(old_ref); + return -ENOMEM; + } + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + + /* + * insert both the head node and the new ref without dropping + * the spin lock + */ + ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, + (u64)-1, 0, 0, 0, + BTRFS_ADD_DELAYED_REF, 0); + BUG_ON(ret); + + ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, + parent, ref_root, ref_generation, + owner_objectid, BTRFS_ADD_DELAYED_REF, 0); + BUG_ON(ret); + + ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes, + orig_parent, orig_ref_root, + orig_ref_generation, owner_objectid, + BTRFS_DROP_DELAYED_REF, pin); + BUG_ON(ret); + spin_unlock(&delayed_refs->lock); + return 0; +} diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h new file mode 100644 index 00000000000..37919e5c007 --- /dev/null +++ b/fs/btrfs/delayed-ref.h @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef __DELAYED_REF__ +#define __DELAYED_REF__ + +/* these are the possible values of struct btrfs_delayed_ref->action */ +#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ +#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ +#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ + +struct btrfs_delayed_ref_node { + struct rb_node rb_node; + + /* the starting bytenr of the extent */ + u64 bytenr; + + /* the parent our backref will point to */ + u64 parent; + + /* the size of the extent */ + u64 num_bytes; + + /* ref count on this data structure */ + atomic_t refs; + + /* + * how many refs is this entry adding or deleting. For + * head refs, this may be a negative number because it is keeping + * track of the total mods done to the reference count. + * For individual refs, this will always be a positive number + * + * It may be more than one, since it is possible for a single + * parent to have more than one ref on an extent + */ + int ref_mod; + + /* is this node still in the rbtree? */ + unsigned int in_tree:1; +}; + +/* + * the head refs are used to hold a lock on a given extent, which allows us + * to make sure that only one process is running the delayed refs + * at a time for a single extent. They also store the sum of all the + * reference count modifications we've queued up. + */ +struct btrfs_delayed_ref_head { + struct btrfs_delayed_ref_node node; + + /* + * the mutex is held while running the refs, and it is also + * held when checking the sum of reference modifications. + */ + struct mutex mutex; + + /* + * when a new extent is allocated, it is just reserved in memory + * The actual extent isn't inserted into the extent allocation tree + * until the delayed ref is processed. must_insert_reserved is + * used to flag a delayed ref so the accounting can be updated + * when a full insert is done. + * + * It is possible the extent will be freed before it is ever + * inserted into the extent allocation tree. In this case + * we need to update the in ram accounting to properly reflect + * the free has happened. + */ + unsigned int must_insert_reserved:1; +}; + +struct btrfs_delayed_ref { + struct btrfs_delayed_ref_node node; + + /* the root objectid our ref will point to */ + u64 root; + + /* the generation for the backref */ + u64 generation; + + /* owner_objectid of the backref */ + u64 owner_objectid; + + /* operation done by this entry in the rbtree */ + u8 action; + + /* if pin == 1, when the extent is freed it will be pinned until + * transaction commit + */ + unsigned int pin:1; +}; + +struct btrfs_delayed_ref_root { + struct rb_root root; + + /* this spin lock protects the rbtree and the entries inside */ + spinlock_t lock; + + /* how many delayed ref updates we've queued, used by the + * throttling code + */ + unsigned long num_entries; + + /* + * set when the tree is flushing before a transaction commit, + * used by the throttling code to decide if new updates need + * to be run right away + */ + int flushing; +}; + +static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) +{ + WARN_ON(atomic_read(&ref->refs) == 0); + if (atomic_dec_and_test(&ref->refs)) { + WARN_ON(ref->in_tree); + kfree(ref); + } +} + +int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, + u64 ref_generation, u64 owner_objectid, int action, + int pin); + +struct btrfs_delayed_ref * +btrfs_find_delayed_ref(struct btrfs_trans_handle *trans, u64 bytenr, + u64 parent); +int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); +int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head **next_ret); +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs); +int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u64 orig_parent, + u64 parent, u64 orig_ref_root, u64 ref_root, + u64 orig_ref_generation, u64 ref_generation, + u64 owner_objectid, int pin); +/* + * a node might live in a head or a regular ref, this lets you + * test for the proper type to use. + */ +static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) +{ + return node->parent == (u64)-1; +} + +/* + * helper functions to cast a node into its container + */ +static inline struct btrfs_delayed_ref * +btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) +{ + WARN_ON(btrfs_delayed_ref_is_head(node)); + return container_of(node, struct btrfs_delayed_ref, node); + +} + +static inline struct btrfs_delayed_ref_head * +btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) +{ + WARN_ON(!btrfs_delayed_ref_is_head(node)); + return container_of(node, struct btrfs_delayed_ref_head, node); + +} +#endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3e18175248e..4f43e227a29 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1458,6 +1458,7 @@ static int transaction_kthread(void *arg) struct btrfs_root *root = arg; struct btrfs_trans_handle *trans; struct btrfs_transaction *cur; + struct btrfs_fs_info *info = root->fs_info; unsigned long now; unsigned long delay; int ret; @@ -1471,12 +1472,6 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); - if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { - printk(KERN_INFO "btrfs: total reference cache " - "size %llu\n", - root->fs_info->total_ref_cache_size); - } - mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; if (!cur) { @@ -1486,13 +1481,30 @@ static int transaction_kthread(void *arg) now = get_seconds(); if (now < cur->start_time || now - cur->start_time < 30) { + unsigned long num_delayed; + num_delayed = cur->delayed_refs.num_entries; mutex_unlock(&root->fs_info->trans_mutex); delay = HZ * 5; + + /* + * we may have been woken up early to start + * processing the delayed extent ref updates + * If so, run some of them and then loop around again + * to see if we need to force a commit + */ + if (num_delayed > 64) { + mutex_unlock(&info->transaction_kthread_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_run_delayed_refs(trans, root, 256); + btrfs_end_transaction(trans, root); + continue; + } goto sleep; } mutex_unlock(&root->fs_info->trans_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); + sleep: wake_up_process(root->fs_info->cleaner_kthread); mutex_unlock(&root->fs_info->transaction_kthread_mutex); @@ -1611,10 +1623,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&fs_info->pending_del, - fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&fs_info->extent_ins, - fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; INIT_LIST_HEAD(&fs_info->dead_reloc_roots); @@ -1629,7 +1637,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->drop_mutex); - mutex_init(&fs_info->extent_ins_mutex); mutex_init(&fs_info->pinned_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fefe83ad205..9b5da2b013e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,10 +49,13 @@ struct pending_extent_op { int del; }; -static int finish_current_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, int all); -static int del_pending_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, int all); +static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner, struct btrfs_key *ins, + int ref_mod); +static int update_reserved_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int reserve); static int pin_down_bytes(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int is_data); @@ -60,6 +63,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, int pin, + int ref_to_drop); static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, @@ -554,262 +563,13 @@ out: return ret; } -/* - * updates all the backrefs that are pending on update_list for the - * extent_root - */ -static noinline int update_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct btrfs_path *path, - struct list_head *update_list) -{ - struct btrfs_key key; - struct btrfs_extent_ref *ref; - struct btrfs_fs_info *info = extent_root->fs_info; - struct pending_extent_op *op; - struct extent_buffer *leaf; - int ret = 0; - struct list_head *cur = update_list->next; - u64 ref_objectid; - u64 ref_root = extent_root->root_key.objectid; - - op = list_entry(cur, struct pending_extent_op, list); - -search: - key.objectid = op->bytenr; - key.type = BTRFS_EXTENT_REF_KEY; - key.offset = op->orig_parent; - - ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); - BUG_ON(ret); - - leaf = path->nodes[0]; - -loop: - ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - - ref_objectid = btrfs_ref_objectid(leaf, ref); - - if (btrfs_ref_root(leaf, ref) != ref_root || - btrfs_ref_generation(leaf, ref) != op->orig_generation || - (ref_objectid != op->level && - ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { - printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " - "root %llu, owner %u\n", - (unsigned long long)op->bytenr, - (unsigned long long)op->orig_parent, - (unsigned long long)ref_root, op->level); - btrfs_print_leaf(extent_root, leaf); - BUG(); - } - |