aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c5986
1 files changed, 5986 insertions, 0 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
new file mode 100644
index 00000000000..293da650873
--- /dev/null
+++ b/fs/btrfs/extent-tree.c
@@ -0,0 +1,5986 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/version.h>
+#include "compat.h"
+#include "hash.h"
+#include "crc32c.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "volumes.h"
+#include "locking.h"
+#include "ref-cache.h"
+#include "compat.h"
+
+#define PENDING_EXTENT_INSERT 0
+#define PENDING_EXTENT_DELETE 1
+#define PENDING_BACKREF_UPDATE 2
+
+struct pending_extent_op {
+ int type;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 parent;
+ u64 orig_parent;
+ u64 generation;
+ u64 orig_generation;
+ int level;
+ struct list_head list;
+ int del;
+};
+
+static int finish_current_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, int all);
+static int del_pending_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, int all);
+static int pin_down_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int is_data);
+static int update_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int alloc,
+ int mark_free);
+
+static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
+{
+ return (cache->flags & bits) == bits;
+}
+
+/*
+ * this adds the block group to the fs_info rb tree for the block group
+ * cache
+ */
+static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct btrfs_block_group_cache *cache;
+
+ spin_lock(&info->block_group_cache_lock);
+ p = &info->block_group_cache_tree.rb_node;
+
+ while (*p) {
+ parent = *p;
+ cache = rb_entry(parent, struct btrfs_block_group_cache,
+ cache_node);
+ if (block_group->key.objectid < cache->key.objectid) {
+ p = &(*p)->rb_left;
+ } else if (block_group->key.objectid > cache->key.objectid) {
+ p = &(*p)->rb_right;
+ } else {
+ spin_unlock(&info->block_group_cache_lock);
+ return -EEXIST;
+ }
+ }
+
+ rb_link_node(&block_group->cache_node, parent, p);
+ rb_insert_color(&block_group->cache_node,
+ &info->block_group_cache_tree);
+ spin_unlock(&info->block_group_cache_lock);
+
+ return 0;
+}
+
+/*
+ * This will return the block group at or after bytenr if contains is 0, else
+ * it will return the block group that contains the bytenr
+ */
+static struct btrfs_block_group_cache *
+block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
+ int contains)
+{
+ struct btrfs_block_group_cache *cache, *ret = NULL;
+ struct rb_node *n;
+ u64 end, start;
+
+ spin_lock(&info->block_group_cache_lock);
+ n = info->block_group_cache_tree.rb_node;
+
+ while (n) {
+ cache = rb_entry(n, struct btrfs_block_group_cache,
+ cache_node);
+ end = cache->key.objectid + cache->key.offset - 1;
+ start = cache->key.objectid;
+
+ if (bytenr < start) {
+ if (!contains && (!ret || start < ret->key.objectid))
+ ret = cache;
+ n = n->rb_left;
+ } else if (bytenr > start) {
+ if (contains && bytenr <= end) {
+ ret = cache;
+ break;
+ }
+ n = n->rb_right;
+ } else {
+ ret = cache;
+ break;
+ }
+ }
+ if (ret)
+ atomic_inc(&ret->count);
+ spin_unlock(&info->block_group_cache_lock);
+
+ return ret;
+}
+
+/*
+ * this is only called by cache_block_group, since we could have freed extents
+ * we need to check the pinned_extents for any extents that can't be used yet
+ * since their free space will be released as soon as the transaction commits.
+ */
+static int add_new_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_fs_info *info, u64 start, u64 end)
+{
+ u64 extent_start, extent_end, size;
+ int ret;
+
+ mutex_lock(&info->pinned_mutex);
+ while (start < end) {
+ ret = find_first_extent_bit(&info->pinned_extents, start,
+ &extent_start, &extent_end,
+ EXTENT_DIRTY);
+ if (ret)
+ break;
+
+ if (extent_start == start) {
+ start = extent_end + 1;
+ } else if (extent_start > start && extent_start < end) {
+ size = extent_start - start;
+ ret = btrfs_add_free_space(block_group, start,
+ size);
+ BUG_ON(ret);
+ start = extent_end + 1;
+ } else {
+ break;
+ }
+ }
+
+ if (start < end) {
+ size = end - start;
+ ret = btrfs_add_free_space(block_group, start, size);
+ BUG_ON(ret);
+ }
+ mutex_unlock(&info->pinned_mutex);
+
+ return 0;
+}
+
+static int remove_sb_from_cache(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 bytenr;
+ u64 *logical;
+ int stripe_len;
+ int i, nr, ret;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+ cache->key.objectid, bytenr, 0,
+ &logical, &nr, &stripe_len);
+ BUG_ON(ret);
+ while (nr--) {
+ btrfs_remove_free_space(cache, logical[nr],
+ stripe_len);
+ }
+ kfree(logical);
+ }
+ return 0;
+}
+
+static int cache_block_group(struct btrfs_root *root,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_path *path;
+ int ret = 0;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ int slot;
+ u64 last;
+
+ if (!block_group)
+ return 0;
+
+ root = root->fs_info->extent_root;
+
+ if (block_group->cached)
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->reada = 2;
+ /*
+ * we get into deadlocks with paths held by callers of this function.
+ * since the alloc_mutex is protecting things right now, just
+ * skip the locking here
+ */
+ path->skip_locking = 1;
+ last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+ key.objectid = last;
+ key.offset = 0;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto err;
+
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto err;
+ if (ret == 0)
+ continue;
+ else
+ break;
+ }
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid < block_group->key.objectid)
+ goto next;
+
+ if (key.objectid >= block_group->key.objectid +
+ block_group->key.offset)
+ break;
+
+ if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
+ add_new_free_space(block_group, root->fs_info, last,
+ key.objectid);
+
+ last = key.objectid + key.offset;
+ }
+next:
+ path->slots[0]++;
+ }
+
+ add_new_free_space(block_group, root->fs_info, last,
+ block_group->key.objectid +
+ block_group->key.offset);
+
+ remove_sb_from_cache(root, block_group);
+ block_group->cached = 1;
+ ret = 0;
+err:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * return the block group that starts at or after bytenr
+ */
+static struct btrfs_block_group_cache *
+btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = block_group_cache_tree_search(info, bytenr, 0);
+
+ return cache;
+}
+
+/*
+ * return the block group that contains teh given bytenr
+ */
+struct btrfs_block_group_cache *btrfs_lookup_block_group(
+ struct btrfs_fs_info *info,
+ u64 bytenr)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = block_group_cache_tree_search(info, bytenr, 1);
+
+ return cache;
+}
+
+static inline void put_block_group(struct btrfs_block_group_cache *cache)
+{
+ if (atomic_dec_and_test(&cache->count))
+ kfree(cache);
+}
+
+static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
+ u64 flags)
+{
+ struct list_head *head = &info->space_info;
+ struct list_head *cur;
+ struct btrfs_space_info *found;
+ list_for_each(cur, head) {
+ found = list_entry(cur, struct btrfs_space_info, list);
+ if (found->flags == flags)
+ return found;
+ }
+ return NULL;
+}
+
+static u64 div_factor(u64 num, int factor)
+{
+ if (factor == 10)
+ return num;
+ num *= factor;
+ do_div(num, 10);
+ return num;
+}
+
+u64 btrfs_find_block_group(struct btrfs_root *root,
+ u64 search_start, u64 search_hint, int owner)
+{
+ struct btrfs_block_group_cache *cache;
+ u64 used;
+ u64 last = max(search_hint, search_start);
+ u64 group_start = 0;
+ int full_search = 0;
+ int factor = 9;
+ int wrapped = 0;
+again:
+ while (1) {
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ if (!cache)
+ break;
+
+ spin_lock(&cache->lock);
+ last = cache->key.objectid + cache->key.offset;
+ used = btrfs_block_group_used(&cache->item);
+
+ if ((full_search || !cache->ro) &&
+ block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
+ if (used + cache->pinned + cache->reserved <
+ div_factor(cache->key.offset, factor)) {
+ group_start = cache->key.objectid;
+ spin_unlock(&cache->lock);
+ put_block_group(cache);
+ goto found;
+ }
+ }
+ spin_unlock(&cache->lock);
+ put_block_group(cache);
+ cond_resched();
+ }
+ if (!wrapped) {
+ last = search_start;
+ wrapped = 1;
+ goto again;
+ }
+ if (!full_search && factor < 10) {
+ last = search_start;
+ full_search = 1;
+ factor = 10;
+ goto again;
+ }
+found:
+ return group_start;
+}
+
+/* simple helper to search for an existing extent at a given offset */
+int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
+{
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_path *path;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ key.objectid = start;
+ key.offset = len;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
+ 0, 0);
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Back reference rules. Back refs have three main goals:
+ *
+ * 1) differentiate between all holders of references to an extent so that
+ * when a reference is dropped we can make sure it was a valid reference
+ * before freeing the extent.
+ *
+ * 2) Provide enough information to quickly find the holders of an extent
+ * if we notice a given block is corrupted or bad.
+ *
+ * 3) Make it easy to migrate blocks for FS shrinking or storage pool
+ * maintenance. This is actually the same as #2, but with a slightly
+ * different use case.
+ *
+ * File extents can be referenced by:
+ *
+ * - multiple snapshots, subvolumes, or different generations in one subvol
+ * - different files inside a single subvolume
+ * - different offsets inside a file (bookend extents in file.c)
+ *
+ * The extent ref structure has fields for:
+ *
+ * - Objectid of the subvolume root
+ * - Generation number of the tree holding the reference
+ * - objectid of the file holding the reference
+ * - number of references holding by parent node (alway 1 for tree blocks)
+ *
+ * Btree leaf may hold multiple references to a file extent. In most cases,
+ * these references are from same file and the corresponding offsets inside
+ * the file are close together.
+ *
+ * When a file extent is allocated the fields are filled in:
+ * (root_key.objectid, trans->transid, inode objectid, 1)
+ *
+ * When a leaf is cow'd new references are added for every file extent found
+ * in the leaf. It looks similar to the create case, but trans->transid will
+ * be different when the block is cow'd.
+ *
+ * (root_key.objectid, trans->transid, inode objectid,
+ * number of references in the leaf)
+ *
+ * When a file extent is removed either during snapshot deletion or
+ * file truncation, we find the corresponding back reference and check
+ * the following fields:
+ *
+ * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
+ * inode objectid)
+ *
+ * Btree extents can be referenced by:
+ *
+ * - Different subvolumes
+ * - Different generations of the same subvolume
+ *
+ * When a tree block is created, back references are inserted:
+ *
+ * (root->root_key.objectid, trans->transid, level, 1)
+ *
+ * When a tree block is cow'd, new back references are added for all the
+ * blocks it points to. If the tree block isn't in reference counted root,
+ * the old back references are removed. These new back references are of
+ * the form (trans->transid will have increased since creation):
+ *
+ * (root->root_key.objectid, trans->transid, level, 1)
+ *
+ * When a backref is in deleting, the following fields are checked:
+ *
+ * if backref was for a tree root:
+ * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
+ * else
+ * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
+ *
+ * Back Reference Key composing:
+ *
+ * The key objectid corresponds to the first byte in the extent, the key
+ * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
+ * byte of parent extent. If a extent is tree root, the key offset is set
+ * to the key objectid.
+ */
+
+static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent,
+ u64 ref_root, u64 ref_generation,
+ u64 owner_objectid, int del)
+{
+ struct btrfs_key key;
+ struct btrfs_extent_ref *ref;
+ struct extent_buffer *leaf;
+ u64 ref_objectid;
+ int ret;
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_REF_KEY;
+ key.offset = parent;
+
+ ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
+ ref_objectid = btrfs_ref_objectid(leaf, ref);
+ if (btrfs_ref_root(leaf, ref) != ref_root ||
+ btrfs_ref_generation(leaf, ref) != ref_generation ||
+ (ref_objectid != owner_objectid &&
+ ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
+ ret = -EIO;
+ WARN_ON(1);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ * updates all the backrefs that are pending on update_list for the
+ * extent_root
+ */
+static noinline int update_backrefs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root,
+ struct btrfs_path *path,
+ struct list_head *update_list)
+{
+ struct btrfs_key key;
+ struct btrfs_extent_ref *ref;
+ struct btrfs_fs_info *info = extent_root->fs_info;
+ struct pending_extent_op *op;
+ struct extent_buffer *leaf;
+ int ret = 0;
+ struct list_head *cur = update_list->next;
+ u64 ref_objectid;
+ u64 ref_root = extent_root->root_key.objectid;
+
+ op = list_entry(cur, struct pending_extent_op, list);
+
+search:
+ key.objectid = op->bytenr;
+ key.type = BTRFS_EXTENT_REF_KEY;
+ key.offset = op->orig_parent;
+
+ ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+
+loop:
+ ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
+
+ ref_objectid = btrfs_ref_objectid(leaf, ref);
+
+ if (btrfs_ref_root(leaf, ref) != ref_root ||
+ btrfs_ref_generation(leaf, ref) != op->orig_generation ||
+ (ref_objectid != op->level &&
+ ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
+ printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
+ "root %llu, owner %u\n",
+ (unsigned long long)op->bytenr,
+ (unsigned long long)op->orig_parent,
+ (unsigned long long)ref_root, op->level);
+ btrfs_print_leaf(extent_root, leaf);
+ BUG();
+ }
+
+ key.objectid = op->bytenr;
+ key.offset = op->parent;
+ key.type = BTRFS_EXTENT_REF_KEY;
+ ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
+ BUG_ON(ret);
+ ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
+ btrfs_set_ref_generation(leaf, ref, op->generation);
+
+ cur = cur->next;
+
+ list_del_init(&op->list);
+ unlock_extent(&info->extent_ins, op->bytenr,
+ op->bytenr + op->num_bytes - 1, GFP_NOFS);
+ kfree(op);
+
+ if (cur == update_list) {
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(extent_root, path);
+ goto out;
+ }
+
+ op = list_entry(cur, struct pending_extent_op, list);
+
+ path->slots[0]++;
+ while (path->slots[0] < btrfs_header_nritems(leaf)) {
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid == op->bytenr &&
+ key.type == BTRFS_EXTENT_REF_KEY)
+ goto loop;
+ path->slots[0]++;
+ }
+
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(extent_root, path);
+ goto search;
+
+out:
+ return 0;
+}
+
+static noinline int insert_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root,
+ struct btrfs_path *path,
+ struct list_head *insert_list, int nr)
+{
+ struct btrfs_key *keys;
+ u32 *data_size;
+ struct pending_extent_op *op;
+ struct extent_buffer *leaf;
+ struct list_head *cur = insert_list->next;
+ struct btrfs_fs_info *info = extent_root->fs_info;
+ u64 ref_root = extent_root->root_key.objectid;
+ int i = 0, last = 0, ret;
+ int total = nr * 2;
+
+ if (!nr)
+ return 0;
+
+ keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
+ if (!keys)
+ return -ENOMEM;
+
+ data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
+ if (!data_size) {
+ kfree(keys);
+ return -ENOMEM;
+ }
+
+ list_for_each_entry(op, insert_list, list) {
+ keys[i].objectid = op->bytenr;
+ keys[i].offset = op->num_bytes;
+ keys[i].type = BTRFS_EXTENT_ITEM_KEY;
+ data_size[i] = sizeof(struct btrfs_extent_item);
+ i++;
+
+ keys[i].objectid = op->bytenr;
+ keys[i].offset = op->parent;
+ keys[i].type = BTRFS_EXTENT_REF_KEY;
+ data_size[i] = sizeof(struct btrfs_extent_ref);
+ i++;
+ }
+
+ op = list_entry(cur, struct pending_extent_op, list);
+ i = 0;
+ while (i < total) {
+ int c;
+ ret = btrfs_insert_some_items(trans, extent_root, path,
+ keys+i, data_size+i, total-i);
+ BUG_ON(ret < 0);
+
+ if (last && ret > 1)
+ BUG();
+
+ leaf = path->nodes[0];
+ for (c = 0; c < ret; c++) {
+ int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
+
+ /*
+ * if the first item we inserted was a backref, then
+ * the EXTENT_ITEM will be the odd c's, else it will
+ * be the even c's
+ */
+ if ((ref_first && (c % 2)) ||
+ (!ref_first && !(c % 2))) {
+ struct btrfs_extent_item *itm;
+
+ itm = btrfs_item_ptr(leaf, path->slots[0] + c,
+ struct btrfs_extent_item);
+ btrfs_set_extent_refs(path->nodes[0], itm, 1);
+ op->del++;
+ } else {
+ struct btrfs_extent_ref *ref;
+
+ ref = btrfs_item_ptr(leaf, path->slots[0] + c,
+ struct btrfs_extent_ref);
+ btrfs_set_ref_root(leaf, ref, ref_root);
+ btrfs_set_ref_generation(leaf, ref,
+ op->generation);
+ btrfs_set_ref_objectid(leaf, ref, op->level);
+ btrfs_set_ref_num_refs(leaf, ref, 1);
+ op->del++;
+ }
+
+ /*
+ * using del to see when its ok to free up the
+ * pending_extent_op. In the case where we insert the
+ * last item on the list in order to help do batching
+ * we need to not free the extent op until we actually
+ * insert the extent_item
+ */
+ if (op->del == 2) {
+ unlock_extent(&info->extent_ins, op->bytenr,
+ op->bytenr + op->num_bytes - 1,
+ GFP_NOFS);
+ cur = cur->next;
+ list_del_init(&op->list);
+ kfree(op);
+ if (cur != insert_list)
+ op = list_entry(cur,
+ struct pending_extent_op,
+ list);
+ }
+ }
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(extent_root, path);
+
+ /*
+ * Ok backref's and items usually go right next to eachother,
+ * but if we could only insert 1 item that means that we
+ * inserted on the end of a leaf, and we have no idea what may
+ * be on the next leaf so we just play it safe. In order to
+ * try and help this case we insert the last thing on our
+ * insert list so hopefully it will end up being the last
+ * thing on the leaf and everything else will be before it,
+ * which will let us insert a whole bunch of items at the same
+ * time.
+ */
+ if (ret == 1 && !last && (i + ret < total)) {
+ /*
+ * last: where we will pick up the next time around
+ * i: our current key to insert, will be total - 1
+ * cur: the current op we are screwing with
+ * op: duh
+ */
+ last = i + ret;
+ i = total - 1;
+ cur = insert_list->prev;
+ op = list_entry(cur, struct pending_extent_op, list);
+ } else if (last) {
+ /*
+ * ok we successfully inserted the last item on the
+ * list, lets reset everything
+ *
+ * i: our current key to insert, so where we left off
+ * last time
+ * last: done with this
+ * cur: the op we are messing with
+ * op: duh
+ * total: since we inserted the last key, we need to
+ * decrement total so we dont overflow
+ */
+ i = last;
+ last = 0;
+ total--;
+ if (i < total) {
+ cur = insert_list->next;
+ op = list_entry(cur, struct pending_extent_op,
+ list);
+ }
+ } else {
+ i += ret;
+ }
+
+ cond_resched();
+ }
+ ret = 0;
+ kfree(keys);
+ kfree(data_size);
+ return ret;
+}
+
+static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent,
+ u64 ref_root, u64 ref_generation,
+ u64 owner_objectid)
+{
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_ref *ref;
+ u32 num_refs;
+ int ret;
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_REF_KEY;
+ key.offset = parent;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
+ if (ret == 0) {
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref);
+ btrfs_set_ref_root(leaf, ref, ref_root);
+ btrfs_set_ref_generation(leaf, ref, ref_generation);
+ btrfs_set_ref_objectid(leaf, ref, owner_objectid);
+ btrfs_set_ref_num_refs(leaf, ref, 1);
+ } else if (ret == -EEXIST) {
+ u64 existing_owner;
+ BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref);
+ if (btrfs_ref_root(leaf, ref) != ref_root ||
+ btrfs_ref_generation(leaf, ref) != ref_generation) {
+ ret = -EIO;
+ WARN_ON(1);
+ goto out;
+ }
+
+ num_refs = btrfs_ref_num_refs(leaf, ref);
+ BUG_ON(num_refs == 0);
+ btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
+
+ existing_owner = btrfs_ref_objectid(leaf, ref);
+ if (existing_owner != owner_objectid &&
+ existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
+ btrfs_set_ref_objectid(leaf, ref,
+ BTRFS_MULTIPLE_OBJECTIDS);
+ }
+ ret = 0;
+ } else {
+ goto out;
+ }
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_release_path(root, path);
+ return ret;
+}
+
+static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_extent_ref *ref;
+ u32 num_refs;
+ int ret = 0;
+
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
+ num_refs = btrfs_ref_num_refs(leaf, ref);
+ BUG_ON(num_refs == 0);
+ num_refs -= 1;
+ if (num_refs == 0) {
+ ret = btrfs_del_item(trans, root, path);
+ } else {
+ btrfs_set_ref_num_refs(leaf, ref, num_refs);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ btrfs_release_path(root, path);
+ return ret;
+}
+
+#ifdef BIO_RW_DISCARD
+static void btrfs_issue_discard(struct block_device *bdev,
+ u64 start, u64 len)
+{
+ blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+}
+#endif
+
+static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes)
+{
+#ifdef BIO_RW_DISCARD
+ int ret;
+ u64 map_length = num_bytes;
+ struct btrfs_multi_bio *multi = NULL;
+
+ /* Tell the block device(s) that the sectors can be discarded */
+ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+ bytenr, &map_length, &multi, 0);
+ if (!ret) {
+ struct btrfs_bio_stripe *stripe = multi->stripes;
+ int i;
+
+ if (map_length > num_bytes)
+ map_length = num_bytes;
+
+ for (i = 0; i < multi->num_stripes; i++, stripe++) {
+ btrfs_issue_discard(stripe->dev->bdev,
+ stripe->physical,
+ map_length);
+ }
+ kfree(multi);
+ }
+
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+static noinline int free_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root,
+ struct list_head *del_list)
+{
+ struct btrfs_fs_info *info = extent_root->fs_info;
+ struct btrfs_path *path;
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+ struct list_head *cur;
+ struct pending_extent_op *op;
+ struct btrfs_extent_item *ei;
+ int ret, num_to_del, extent_slot = 0, found_extent = 0;
+ u32 refs;
+ u64 bytes_freed = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = 1;
+
+search:
+ /* search for the backref for the current ref we want to delete */
+ cur = del_list->next;
+ op = list_entry(cur, struct pending_extent_op, list);
+ ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
+ op->orig_parent,
+ extent_root->root_key.objectid,
+ op->orig_generation, op->level, 1);
+ if (ret) {
+ printk(KERN_ERR "btrfs unable to find backref byte nr %llu "
+ "root %llu gen %llu owner %u\n",
+ (unsigned long long)op->bytenr,
+ (unsigned long long)extent_root->root_key.objectid,
+ (unsigned long long)op->orig_generation, op->level);
+ btrfs_print_leaf(extent_root, path->nodes[0]);
+ WARN_ON(1);
+ goto out;
+ }
+
+ extent_slot = path->slots[0];
+ num_to_del = 1;
+ found_extent = 0;
+
+ /*
+ * if we aren't the first item on the leaf we can move back one and see
+ * if our ref is right next to our extent item
+ */
+ if (likely(extent_slot)) {
+ extent_slot--;
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ extent_slot);
+ if (found_key.objectid == op->bytenr &&
+ found_key.type == BTRFS_EXTENT_ITEM_KEY &&
+ found_key.offset == op->num_bytes) {
+ num_to_del++;
+ found_extent = 1;
+ }
+ }
+
+ /*
+ * if we didn't find the extent we need to delete the backref and then
+ * search for the extent item key so we can update its ref count
+ */
+ if (!found_extent) {
+ key.objectid = op->bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = op->num_bytes;
+
+ ret = remove_extent_backref(trans, extent_root, path);
+ BUG_ON(ret);
+ btrfs_release_path(extent_root, path);
+ ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
+ BUG_ON(ret);
+ extent_slot = path->slots[0];
+ }
+
+ /* this is where we update the ref count for the extent */
+ leaf = path->nodes[0];
+ ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
+ refs = btrfs_extent_refs(leaf, ei);
+ BUG_ON(refs == 0);
+ refs--;
+ btrfs_set_extent_refs(leaf, ei, refs);
+
+ btrfs_mark_buffer_dirty(leaf);
+
+ /*
+ * This extent needs deleting. The reason cur_slot is extent_slot +
+ * num_to_del is because extent_slot points to the slot where the extent
+ * is, and if the backref was not right next to the extent we will be
+ * deleting at least 1 item, and will want to start searching at the
+ * slot directly next to extent_slot. However if we did find the
+ * backref next to the extent item them we will be deleting at least 2
+ * items and will want to start searching directly after the ref slot
+ */
+ if (!refs) {
+ struct list_head *pos, *n, *end;
+ int cur_slot = extent_slot+num_to_del;
+ u64 super_used;
+ u64 root_used;
+
+ path->slots[0] = extent_slot;
+ bytes_freed = op->num_bytes;
+
+ mutex_lock(&info->pinned_mutex);
+ ret = pin_down_bytes(trans, extent_root, op->bytenr,
+ op->num_bytes, op->level >=
+ BTRFS_FIRST_FREE_OBJECTID);
+ mutex_unlock(&info->pinned_mutex);
+ BUG_ON(ret < 0);
+ op->del = ret;
+
+ /*
+ * we need to see if we can delete multiple things at once, so
+ * start looping through the list of extents we are wanting to
+ * delete and see if their extent/backref's are right next to
+ * eachother and the extents only have 1 ref
+ */
+ for (pos = cur->next; pos != del_list; pos = pos->next) {
+ struct pending_extent_op *tmp;
+
+ tmp = list_entry(pos, struct pending_extent_op, list);
+
+ /* we only want to delete extent+ref at this stage */
+ if (cur_slot >= btrfs_header_nritems(leaf) - 1)
+ break;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
+ if (found_key.objectid != tmp->bytenr ||
+ found_key.type != BTRFS_EXTENT_ITEM_KEY ||
+ found_key.offset != tmp->num_bytes)
+ break;
+
+ /* check to make sure this extent only has one ref */
+ ei = btrfs_item_ptr(leaf, cur_slot,
+ struct btrfs_extent_item);
+ if (btrfs_extent_refs(leaf, ei) != 1)
+ break;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
+ if (found_key.objectid != tmp->bytenr ||
+ found_key.type != BTRFS_EXTENT_REF_KEY ||
+ found_key.offset != tmp->orig_parent)
+ break;
+
+ /*
+ * the ref is right next to the extent, we can set the
+ * ref count to 0 since we will delete them both now
+ */
+ btrfs_set_extent_refs(leaf, ei, 0);
+
+ /* pin down the bytes for this extent */
+ mutex_lock(&info->pinned_mutex);
+ ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
+ tmp->num_bytes, tmp->level >=
+ BTRFS_FIRST_FREE_OBJECTID);
+ mutex_unlock(&info->pinned_mutex);
+ BUG_ON(ret < 0);
+
+ /*
+ * use the del field to tell if we need to go ahead and
+ * free up the extent when we delete the item or not.
+ */
+ tmp->del = ret;
+ bytes_freed += tmp->num_bytes;
+
+ num_to_del += 2;
+ cur_slot += 2;
+ }
+ end = pos;
+
+ /* update the free space counters */
+ spin_lock(&info->delalloc_lock);
+ super_used = btrfs_super_bytes_used(&info->super_copy);
+ btrfs_set_super_bytes_used(&info->super_copy,
+ super_used - bytes_freed);
+
+ root_used = btrfs_root_used(&extent_root->root_item);
+ btrfs_set_root_used(&extent_root->root_item,
+ root_used - bytes_freed);
+ spin_unlock(&info->delalloc_lock);
+
+ /* delete the items */
+ ret = btrfs_del_items(trans, extent_root, path,
+ path->slots[0], num_to_del);
+ BUG_ON(ret);
+
+ /*
+ * loop through the extents we deleted and do the cleanup work
+ * on them
+ */
+ for (pos = cur, n = pos->next; pos != end;
+ pos = n, n = pos->next) {
+ struct pending_extent_op *tmp;
+ tmp = list_entry(pos, struct pending_extent_op, list);
+
+ /*
+ * remember tmp->del tells us wether or not we pinned
+ * down the extent
+ */
+ ret = update_block_group(trans, extent_root,
+ tmp->bytenr, tmp->num_bytes, 0,
+ tmp->del);
+ BUG_ON(ret);
+
+ list_del_init(&tmp->list);
+ unlock_extent(&info->extent_ins, tmp->bytenr,
+ tmp->bytenr + tmp->num_bytes - 1,
+ GFP_NOFS);
+ kfree(tmp);
+ }
+ } else if (refs && found_extent) {
+ /*
+ * the ref and extent were right next to eachother, but the
+ * extent still has a ref, so just free the backref and keep
+ * going
+ */
+ ret = remove_extent_backref(trans, extent_root, path);
+ BUG_ON(ret);
+
+ list_del_init(&op->list);
+ unlock_extent(&info->extent_ins, op->bytenr,
+ op->bytenr + op->num_bytes - 1, GFP_NOFS);
+ kfree(op);
+ } else {
+ /*
+ * the extent has multiple refs and the backref we were looking
+ * for was not right next to it, so just unlock and go next,
+ * we're good to go
+ */
+ list_del_init(&op->list);
+ unlock_extent(&info->extent_ins, op->bytenr,
+ op->bytenr + op->num_bytes - 1, GFP_NOFS);
+ kfree(op);
+ }
+
+ btrfs_release_path(extent_root, path);
+ if (!list_empty(del_list))
+ goto search;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 orig_parent, u64 parent,
+ u64 orig_root, u64 ref_root,
+ u64 orig_generation, u64 ref_generation,
+ u64 owner_objectid)
+{
+ int ret;
+ struct btrfs_root *extent_root = root->fs_info->extent_root;
+ struct btrfs_path *path;
+
+ if (root == root->fs_info->extent_root) {
+ struct pending_extent_op *extent_op;
+ u64 num_bytes;
+
+ BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
+ num_bytes = btrfs_level_size(root, (int)owner_objectid);
+ mutex_lock(&root-