aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/TODO20
-rw-r--r--fs/btrfs/async-thread.c10
-rw-r--r--fs/btrfs/async-thread.h7
-rw-r--r--fs/btrfs/bit-radix.c130
-rw-r--r--fs/btrfs/bit-radix.h33
-rw-r--r--fs/btrfs/btrfs_inode.h54
-rw-r--r--fs/btrfs/crc32c.h18
-rw-r--r--fs/btrfs/ctree.c127
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/dir-item.c41
-rw-r--r--fs/btrfs/disk-io.c33
-rw-r--r--fs/btrfs/extent_io.c34
-rw-r--r--fs/btrfs/extent_map.c10
-rw-r--r--fs/btrfs/file.c44
-rw-r--r--fs/btrfs/inode.c189
-rw-r--r--fs/btrfs/locking.c13
-rw-r--r--fs/btrfs/ordered-data.c19
-rw-r--r--fs/btrfs/ref-cache.c26
-rw-r--r--fs/btrfs/ref-cache.h3
-rw-r--r--fs/btrfs/root-tree.c21
-rw-r--r--fs/btrfs/struct-funcs.c21
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/transaction.c67
-rw-r--r--fs/btrfs/tree-defrag.c4
25 files changed, 653 insertions, 277 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index d5c28557fba..48b7909ca8d 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
obj-m := btrfs.o
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
file-item.o inode-item.o inode-map.o disk-io.o \
- transaction.o bit-radix.o inode.o file.o tree-defrag.o \
+ transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
ref-cache.o export.o tree-log.o acl.o free-space-cache.o
diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO
deleted file mode 100644
index d9b6d38c603..00000000000
--- a/fs/btrfs/TODO
+++ /dev/null
@@ -1,20 +0,0 @@
-* cleanup, add more error checking, get rid of BUG_ONs
-* Fix ENOSPC handling
-* Make allocator smarter
-* add a block group to struct inode
-* Do actual block accounting
-* Check compat and incompat flags on the inode
-* Get rid of struct ctree_path, limiting tree levels held at one time
-* Add generation number to key pointer in nodes
-* Add generation number to inode
-* forbid cross subvolume renames and hardlinks
-* Release
-* Do real tree locking
-* Add extent mirroring (backup copies of blocks)
-* Add fancy interface to get access to incremental backups
-* Add fancy striped extents to make big reads faster
-* Use relocation to try and fix write errors
-* Make allocator much smarter
-* xattrs (directory streams for regular files)
-* Scrub & defrag
-
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 4e780b279de..04fb9702d14 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -231,17 +231,25 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
/*
* if we pick a busy task, move the task to the end of the list.
- * hopefully this will keep things somewhat evenly balanced
+ * hopefully this will keep things somewhat evenly balanced.
+ * Do the move in batches based on the sequence number. This groups
+ * requests submitted at roughly the same time onto the same worker.
*/
next = workers->worker_list.next;
worker = list_entry(next, struct btrfs_worker_thread, worker_list);
atomic_inc(&worker->num_pending);
worker->sequence++;
+
if (worker->sequence % workers->idle_thresh == 0)
list_move_tail(next, &workers->worker_list);
return worker;
}
+/*
+ * selects a worker thread to take the next job. This will either find
+ * an idle worker, start a new worker up to the max count, or just return
+ * one of the existing busy workers.
+ */
static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
{
struct btrfs_worker_thread *worker;
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 43e44d115dd..4ec9a2ee0f9 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -63,14 +63,17 @@ struct btrfs_workers {
/* once a worker has this many requests or fewer, it is idle */
int idle_thresh;
- /* list with all the work threads */
+ /* list with all the work threads. The workers on the idle thread
+ * may be actively servicing jobs, but they haven't yet hit the
+ * idle thresh limit above.
+ */
struct list_head worker_list;
struct list_head idle_list;
/* lock for finding the next worker thread to queue on */
spinlock_t lock;
- /* extra name for this worker */
+ /* extra name for this worker, used for current->name */
char *name;
};
diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c
deleted file mode 100644
index e8bf876db39..00000000000
--- a/fs/btrfs/bit-radix.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "bit-radix.h"
-
-#define BIT_ARRAY_BYTES 256
-#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8)
-
-extern struct kmem_cache *btrfs_bit_radix_cachep;
-int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
-{
- unsigned long *bits;
- unsigned long slot;
- int bit_slot;
- int ret;
-
- slot = bit / BIT_RADIX_BITS_PER_ARRAY;
- bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
-
- bits = radix_tree_lookup(radix, slot);
- if (!bits) {
- bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS);
- if (!bits)
- return -ENOMEM;
- memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long));
- bits[0] = slot;
- ret = radix_tree_insert(radix, slot, bits);
- if (ret)
- return ret;
- }
- ret = test_and_set_bit(bit_slot, bits + 1);
- if (ret < 0)
- ret = 1;
- return ret;
-}
-
-int test_radix_bit(struct radix_tree_root *radix, unsigned long bit)
-{
- unsigned long *bits;
- unsigned long slot;
- int bit_slot;
-
- slot = bit / BIT_RADIX_BITS_PER_ARRAY;
- bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
-
- bits = radix_tree_lookup(radix, slot);
- if (!bits)
- return 0;
- return test_bit(bit_slot, bits + 1);
-}
-
-int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit)
-{
- unsigned long *bits;
- unsigned long slot;
- int bit_slot;
- int i;
- int empty = 1;
-
- slot = bit / BIT_RADIX_BITS_PER_ARRAY;
- bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
-
- bits = radix_tree_lookup(radix, slot);
- if (!bits)
- return 0;
- clear_bit(bit_slot, bits + 1);
- for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) {
- if (bits[i]) {
- empty = 0;
- break;
- }
- }
- if (empty) {
- bits = radix_tree_delete(radix, slot);
- BUG_ON(!bits);
- kmem_cache_free(btrfs_bit_radix_cachep, bits);
- }
- return 0;
-}
-
-int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
- unsigned long start, int nr)
-{
- unsigned long *bits;
- unsigned long *gang[4];
- int found;
- int ret;
- int i;
- int total_found = 0;
- unsigned long slot;
-
- slot = start / BIT_RADIX_BITS_PER_ARRAY;
- ret = radix_tree_gang_lookup(radix, (void **)gang, slot,
- ARRAY_SIZE(gang));
- found = start % BIT_RADIX_BITS_PER_ARRAY;
- for (i = 0; i < ret && nr > 0; i++) {
- bits = gang[i];
- while(nr > 0) {
- found = find_next_bit(bits + 1,
- BIT_RADIX_BITS_PER_ARRAY,
- found);
- if (found < BIT_RADIX_BITS_PER_ARRAY) {
- *retbits = bits[0] *
- BIT_RADIX_BITS_PER_ARRAY + found;
- retbits++;
- nr--;
- total_found++;
- found++;
- } else
- break;
- }
- found = 0;
- }
- return total_found;
-}
diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h
deleted file mode 100644
index c100f54d5c3..00000000000
--- a/fs/btrfs/bit-radix.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BIT_RADIX__
-#define __BIT_RADIX__
-#include <linux/radix-tree.h>
-
-int set_radix_bit(struct radix_tree_root *radix, unsigned long bit);
-int test_radix_bit(struct radix_tree_root *radix, unsigned long bit);
-int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit);
-int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
- unsigned long start, int nr);
-
-static inline void init_bit_radix(struct radix_tree_root *radix)
-{
- INIT_RADIX_TREE(radix, GFP_NOFS);
-}
-#endif
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0577fda2168..0b2e623cf42 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -25,27 +25,58 @@
/* in memory btrfs inode */
struct btrfs_inode {
+ /* which subvolume this inode belongs to */
struct btrfs_root *root;
+
+ /* the block group preferred for allocations. This pointer is buggy
+ * and needs to be replaced with a bytenr instead
+ */
struct btrfs_block_group_cache *block_group;
+
+ /* key used to find this inode on disk. This is used by the code
+ * to read in roots of subvolumes
+ */
struct btrfs_key location;
+
+ /* the extent_tree has caches of all the extent mappings to disk */
struct extent_map_tree extent_tree;
+
+ /* the io_tree does range state (DIRTY, LOCKED etc) */
struct extent_io_tree io_tree;
+
+ /* special utility tree used to record which mirrors have already been
+ * tried when checksums fail for a given block
+ */
struct extent_io_tree io_failure_tree;
+
+ /* held while inserting checksums to avoid races */
struct mutex csum_mutex;
+
+ /* held while inesrting or deleting extents from files */
struct mutex extent_mutex;
+
+ /* held while logging the inode in tree-log.c */
struct mutex log_mutex;
- struct inode vfs_inode;
+
+ /* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
+ /* standard acl pointers */
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
/* for keeping track of orphaned inodes */
struct list_head i_orphan;
+ /* list of all the delalloc inodes in the FS. There are times we need
+ * to write all the delalloc pages to disk, and this list is used
+ * to walk them all.
+ */
struct list_head delalloc_inodes;
- /* full 64 bit generation number */
+ /* full 64 bit generation number, struct vfs_inode doesn't have a big
+ * enough field for this.
+ */
u64 generation;
/*
@@ -57,10 +88,25 @@ struct btrfs_inode {
*/
u64 logged_trans;
- /* trans that last made a change that should be fully fsync'd */
+ /*
+ * trans that last made a change that should be fully fsync'd. This
+ * gets reset to zero each time the inode is logged
+ */
u64 log_dirty_trans;
+
+ /* total number of bytes pending delalloc, used by stat to calc the
+ * real block usage of the file
+ */
u64 delalloc_bytes;
+
+ /*
+ * the size of the file stored in the metadata on disk. data=ordered
+ * means the in-memory i_size might be larger than the size on disk
+ * because not all the blocks are written yet.
+ */
u64 disk_i_size;
+
+ /* flags field from the on disk inode */
u32 flags;
/*
@@ -68,6 +114,8 @@ struct btrfs_inode {
* number for new files that are created
*/
u64 index_cnt;
+
+ struct inode vfs_inode;
};
static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h
index 4f0fefed132..1eaf11d334f 100644
--- a/fs/btrfs/crc32c.h
+++ b/fs/btrfs/crc32c.h
@@ -1,3 +1,21 @@
+/*
+ * Copyright (C) 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
#ifndef __BTRFS_CRC32C__
#define __BTRFS_CRC32C__
#include <asm/byteorder.h>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 50e81f43e6d..ff3261ff2e1 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007 Oracle. All rights reserved.
+ * Copyright (C) 2007,2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
@@ -54,12 +54,19 @@ struct btrfs_path *btrfs_alloc_path(void)
return path;
}
+/* this also releases the path */
void btrfs_free_path(struct btrfs_path *p)
{
btrfs_release_path(NULL, p);
kmem_cache_free(btrfs_path_cachep, p);
}
+/*
+ * path release drops references on the extent buffers in the path
+ * and it drops any locks held by this path
+ *
+ * It is safe to call this on paths that no locks or extent buffers held.
+ */
void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
{
int i;
@@ -77,6 +84,16 @@ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
}
}
+/*
+ * safely gets a reference on the root node of a tree. A lock
+ * is not taken, so a concurrent writer may put a different node
+ * at the root of the tree. See btrfs_lock_root_node for the
+ * looping required.
+ *
+ * The extent buffer returned by this has a reference taken, so
+ * it won't disappear. It may stop being the root of the tree
+ * at any time because there are no locks held.
+ */
struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
@@ -87,6 +104,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
return eb;
}
+/* loop around taking references on and locking the root node of the
+ * tree until you end up with a lock on the root. A locked buffer
+ * is returned, with a reference held.
+ */
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
@@ -108,6 +129,10 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
return eb;
}
+/* cowonly root (everything not a reference counted cow subvolume), just get
+ * put onto a simple dirty list. transaction.c walks this to make sure they
+ * get properly updated on disk.
+ */
static void add_root_to_dirty_list(struct btrfs_root *root)
{
if (root->track_dirty && list_empty(&root->dirty_list)) {
@@ -116,6 +141,11 @@ static void add_root_to_dirty_list(struct btrfs_root *root)
}
}
+/*
+ * used by snapshot creation to make a copy of a root for a tree with
+ * a given objectid. The buffer with the new root node is returned in
+ * cow_ret, and this func returns zero on success or a negative error code.
+ */
int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
@@ -167,6 +197,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
return 0;
}
+/*
+ * does the dirty work in cow of a single block. The parent block
+ * (if supplied) is updated to point to the new cow copy. The new
+ * buffer is marked dirty and returned locked. If you modify the block
+ * it needs to be marked dirty again.
+ *
+ * search_start -- an allocation hint for the new block
+ *
+ * empty_size -- a hint that you plan on doing more cow. This is the size in bytes
+ * the allocator should try to find free next to the block it returns. This is
+ * just a hint and may be ignored by the allocator.
+ *
+ * prealloc_dest -- if you have already reserved a destination for the cow,
+ * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent
+ * is used to finish the allocation.
+ */
int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
@@ -311,6 +357,11 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
return 0;
}
+/*
+ * cows a single block, see __btrfs_cow_block for the real work.
+ * This version of it has extra checks so that a block isn't cow'd more than
+ * once per transaction, as long as it hasn't been written yet
+ */
int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *parent, int parent_slot,
@@ -347,6 +398,10 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * helper function for defrag to decide if two blocks pointed to by a
+ * node are actually close by
+ */
static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
{
if (blocknr < other && other - (blocknr + blocksize) < 32768)
@@ -381,6 +436,11 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
}
+/*
+ * this is used by the defrag code to go through all the
+ * leaves pointed to by a node and reallocate them so that
+ * disk order is close to key order
+ */
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret,
@@ -521,6 +581,10 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
return btrfs_item_offset_nr(leaf, nr - 1);
}
+/*
+ * extra debugging checks to make sure all the items in a key are
+ * well formed and in the proper order
+ */
static int check_node(struct btrfs_root *root, struct btrfs_path *path,
int level)
{
@@ -561,6 +625,10 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path,
return 0;
}
+/*
+ * extra checking to make sure all the items in a leaf are
+ * well formed and in the proper order
+ */
static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
int level)
{
@@ -782,6 +850,10 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
return -1;
}
+/* given a node and slot number, this reads the blocks it points to. The
+ * extent buffer is returned with a reference taken (but unlocked).
+ * NULL is returned on error.
+ */
static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
struct extent_buffer *parent, int slot)
{
@@ -798,6 +870,11 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
btrfs_node_ptr_generation(parent, slot));
}
+/*
+ * node level balancing, used to make sure nodes are in proper order for
+ * item deletion. We balance from the top down, so we have to make sure
+ * that a deletion won't leave an node completely empty later on.
+ */
static noinline int balance_level(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int level)
@@ -1024,7 +1101,10 @@ enospc:
return ret;
}
-/* returns zero if the push worked, non-zero otherwise */
+/* Node balancing for insertion. Here we only split or push nodes around
+ * when they are completely full. This is also done top down, so we
+ * have to be pessimistic.
+ */
static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int level)
@@ -1150,7 +1230,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
}
/*
- * readahead one full node of leaves
+ * readahead one full node of leaves, finding things that are close
+ * to the block in 'slot', and triggering ra on them.
*/
static noinline void reada_for_search(struct btrfs_root *root,
struct btrfs_path *path,
@@ -1226,6 +1307,19 @@ static noinline void reada_for_search(struct btrfs_root *root,
}
}
+/*
+ * when we walk down the tree, it is usually safe to unlock the higher layers in
+ * the tree. The exceptions are when our path goes through slot 0, because operations
+ * on the tree might require changing key pointers higher up in the tree.
+ *
+ * callers might also have set path->keep_locks, which tells this code to
+ * keep the lock if the path points to the last slot in the block. This is
+ * part of walking through the tree, and selecting the next slot in the higher
+ * block.
+ *
+ * lowest_unlock sets the lowest level in the tree we're allowed to unlock.
+ * so if lowest_unlock is 1, level 0 won't be unlocked
+ */
static noinline void unlock_up(struct btrfs_path *path, int level,
int lowest_unlock)
{
@@ -2705,6 +2799,12 @@ again:
return ret;
}
+/*
+ * make the item pointed to by the path smaller. new_size indicates
+ * how small to make it, and from_end tells us if we just chop bytes
+ * off the end of the item or if we shift the item to chop bytes off
+ * the front.
+ */
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -2818,6 +2918,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * make the item pointed to by the path bigger, data_size is the new size.
+ */
int btrfs_extend_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
u32 data_size)
@@ -2897,7 +3000,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
}
/*
- * Given a key and some data, insert an item into the tree.
+ * Given a key and some data, insert items into the tree.
* This does all the path init required, making room in the tree if needed.
*/
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
@@ -3046,9 +3149,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
/*
* delete the pointer from a given node.
*
- * If the delete empties a node, the node is removed from the tree,
- * continuing all the way the root if required. The root is converted into
- * a leaf if all the nodes are emptied.
+ * the tree should have been previously balanced so the deletion does not
+ * empty a node.
*/
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot)
@@ -3233,6 +3335,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* search the tree again to find a leaf with lesser keys
* returns 0 if it found something or 1 if there are no lesser leaves.
* returns < 0 on io errors.
+ *
+ * This may release the path, and so you may lose any locks held at the
+ * time you call it.
*/
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
@@ -3265,9 +3370,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
/*
* A helper function to walk down the tree starting at min_key, and looking
* for nodes or leaves that are either in cache or have a minimum
- * transaction id. This is used by the btree defrag code, but could
- * also be used to search for blocks that have changed since a given
- * transaction id.
+ * transaction id. This is used by the btree defrag code, and tree logging
*
* This does not cow, but it does stuff the starting key it finds back
* into min_key, so you can call btrfs_search_slot with cow=1 on the
@@ -3279,6 +3382,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
* This honors path->lowest_level to prevent descent past a given level
* of the tree.
*
+ * min_trans indicates the oldest transaction that you are interested
+ * in walking through. Any nodes or leaves older than min_trans are
+ * skipped over (without reading them).
+ *
* returns zero if something useful was found, < 0 on error and 1 if there
* was nothing in the tree that matched the search criteria.
*/
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0079b60b18f..ded1643c027 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -27,7 +27,6 @@
#include <linux/backing-dev.h>
#include <linux/wait.h>
#include <asm/kmap_types.h>
-#include "bit-radix.h"
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e4f30090d64..5040b71f190 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,6 +21,14 @@
#include "hash.h"
#include "transaction.h"
+/*
+ * insert a name into a directory, doing overflow properly if there is a hash
+ * collision. data_size indicates how big the item inserted should be. On
+ * success a struct btrfs_dir_item pointer is returned, otherwise it is
+ * an ERR_PTR.
+ *
+ * The name is not copied into the dir item, you have to do that yourself.
+ */
static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
*trans,
struct btrfs_root *root,
@@ -55,6 +63,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
return (struct btrfs_dir_item *)ptr;
}
+/*
+ * xattrs work a lot like directories, this inserts an xattr item
+ * into the tree
+ */
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *name,
u16 name_len, const void *data, u16 data_len,
@@ -109,6 +121,13 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * insert a directory item in the tree, doing all the magic for
+ * both indexes. 'dir' indicates which objectid to insert it into,
+ * 'location' is the key to stuff into the directory item, 'type' is the
+ * type of the inode we're pointing to, and 'index' is the sequence number
+ * to use for the second index (if one is created).
+ */
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, const char *name, int name_len, u64 dir,
struct btrfs_key *location, u8 type, u64 index)
@@ -184,6 +203,11 @@ out:
return 0;
}
+/*
+ * lookup a directory item based on name. 'dir' is the objectid
+ * we're searching in, and 'mod' tells us if you plan on deleting the
+ * item (use mod < 0) or changing the options (use mod > 0)
+ */
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
@@ -222,6 +246,14 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
return btrfs_match_dir_item_name(root, path, name, name_len);
}
+/*
+ * lookup a directory item based on index. 'dir' is the objectid
+ * we're searching in, and 'mod' tells us if you plan on deleting the
+ * item (use mod < 0) or changing the options (use mod > 0)
+ *
+ * The name is used to make sure the index really points to the name you were
+ * looking for.
+ */
struct btrfs_dir_item *
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -282,6 +314,11 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
return btrfs_match_dir_item_name(root, path, name, name_len);
}
+/*
+ * helper function to look at the directory item pointed to by 'path'
+ * this walks through all the entries in a dir item and finds one
+ * for a specific name.
+ */
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len)
@@ -313,6 +350,10 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
return NULL;
}
+/*
+ * given a pointer into a directory item, delete it. This
+ * handles items that have more than one entry in them.
+ */
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 45b4f728527..5ee10d3136f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -55,6 +55,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
+/*
+ * end_io_wq structs are used to do processing in task context when an IO is
+ * complete. This is used during reads to verify checksums, and it is used
+ * by writes to insert metadata for new file extents after IO is complete.
+ */
struct end_io_wq {
struct bio *bio;
bio_end_io_t *end_io;
@@ -66,6 +71,11 @@ struct end_io_wq {
struct btrfs_work work;
};
+/*
+ * async submit bios are used to offload expensive checksumming
+ * onto the worker threads. They checksum file and metadata bios
+ * just before they are sent down the IO stack.
+ */
struct async_submit_bio {
struct inode *inode;
struct bio *bio;
@@ -76,6 +86,10 @@ struct async_submit_bio {
struct btrfs_work work;
};
+/*
+ * extents on the btree inode are pretty simple, there's one extent
+ * that covers the entire device
+ */
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
size_t page_offset, u64 start, u64 len,
int create)
@@ -151,6 +165,10 @@ void btrfs_csum_final(u32 crc, char *result)
*(__le32 *)result = ~cpu_to_le32(crc);
}
+/*
+ * compute the csum for a btree block, and either verify it or write it
+ * into the csum field of the block.
+ */
static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
int verify)
{
@@ -204,6 +222,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
return 0;
}
+/*
+ * we can't consider a given block up to date unless the transid of the
+ * block matches the transid in the parent node's pointer. This is how we
+ * detect blocks that either didn't get written at all or got written
+ * in the wrong place.
+ */
static int verify_parent_transid(struct extent_io_tree *io_tree,
struct extent_buffer *eb, u64 parent_transid)
{
@@ -228,9 +252,12 @@ out:
unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
GFP_NOFS);
return ret;
-
}
+/*
+ * helper to read a given tree block,