From be0e5c097fc206b863ce9fe6b3cfd6974b0110f4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 26 Jan 2007 15:51:26 -0500 Subject: Btrfs: Initial checkin, basic working tree code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 810 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 810 insertions(+) create mode 100644 fs/btrfs/ctree.c diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c new file mode 100644 index 00000000000..893fd56960a --- /dev/null +++ b/fs/btrfs/ctree.c @@ -0,0 +1,810 @@ +#include +#include +#include "kerncompat.h" + +#define BLOCKSIZE 4096 + +struct key { + u64 objectid; + u32 flags; + u64 offset; +} __attribute__ ((__packed__)); + +struct header { + u64 fsid[2]; /* FS specific uuid */ + u64 blocknum; + u64 parentid; + u32 csum; + u32 ham; + u16 nritems; + u16 flags; +} __attribute__ ((__packed__)); + +#define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \ + (sizeof(struct key) + sizeof(u64))) + +#define LEVEL_BITS 3 +#define MAX_LEVEL (1 << LEVEL_BITS) +#define node_level(f) ((f) & (MAX_LEVEL-1)) +#define is_leaf(f) (node_level(f) == 0) + +struct ctree_root { + struct node *node; +}; + +struct item { + struct key key; + u16 offset; + u16 size; +} __attribute__ ((__packed__)); + +#define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header)) +struct leaf { + struct header header; + union { + struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; + u8 data[BLOCKSIZE-sizeof(struct header)]; + }; +} __attribute__ ((__packed__)); + +struct node { + struct header header; + struct key keys[NODEPTRS_PER_BLOCK]; + u64 blockptrs[NODEPTRS_PER_BLOCK]; +} __attribute__ ((__packed__)); + +struct ctree_path { + struct node *nodes[MAX_LEVEL]; + int slots[MAX_LEVEL]; +}; + +static inline void init_path(struct ctree_path *p) +{ + memset(p, 0, sizeof(*p)); +} + +static inline unsigned int leaf_data_end(struct leaf *leaf) +{ + unsigned int nr = leaf->header.nritems; + if (nr == 0) + return ARRAY_SIZE(leaf->data); + return leaf->items[nr-1].offset; +} + +static inline int leaf_free_space(struct leaf *leaf) +{ + int data_end = leaf_data_end(leaf); + int nritems = leaf->header.nritems; + char *items_end = (char *)(leaf->items + nritems + 1); + return (char *)(leaf->data + data_end) - (char *)items_end; +} + +int comp_keys(struct key *k1, struct key *k2) +{ + if (k1->objectid > k2->objectid) + return 1; + if (k1->objectid < k2->objectid) + return -1; + if (k1->flags > k2->flags) + return 1; + if (k1->flags < k2->flags) + return -1; + if (k1->offset > k2->offset) + return 1; + if (k1->offset < k2->offset) + return -1; + return 0; +} +int generic_bin_search(char *p, int item_size, struct key *key, + int max, int *slot) +{ + int low = 0; + int high = max; + int mid; + int ret; + struct key *tmp; + + while(low < high) { + mid = (low + high) / 2; + tmp = (struct key *)(p + mid * item_size); + ret = comp_keys(tmp, key); + + if (ret < 0) + low = mid + 1; + else if (ret > 0) + high = mid; + else { + *slot = mid; + return 0; + } + } + *slot = low; + return 1; +} + +int bin_search(struct node *c, struct key *key, int *slot) +{ + if (is_leaf(c->header.flags)) { + struct leaf *l = (struct leaf *)c; + return generic_bin_search((void *)l->items, sizeof(struct item), + key, c->header.nritems, slot); + } else { + return generic_bin_search((void *)c->keys, sizeof(struct key), + key, c->header.nritems, slot); + } + return -1; +} + +void *read_block(u64 blocknum) +{ + return (void *)blocknum; +} + +int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) +{ + struct node *c = root->node; + int slot; + int ret; + int level; + while (c) { + level = node_level(c->header.flags); + p->nodes[level] = c; + ret = bin_search(c, key, &slot); + if (!is_leaf(c->header.flags)) { + if (ret && slot > 0) + slot -= 1; + p->slots[level] = slot; + c = read_block(c->blockptrs[slot]); + continue; + } else { + p->slots[level] = slot; + return ret; + } + } + return -1; +} + +static void fixup_low_keys(struct ctree_path *path, struct key *key, + int level) +{ + int i; + /* adjust the pointers going up the tree */ + for (i = level; i < MAX_LEVEL; i++) { + struct node *t = path->nodes[i]; + int tslot = path->slots[i]; + if (!t) + break; + memcpy(t->keys + tslot, key, sizeof(*key)); + if (tslot != 0) + break; + } +} + +int __insert_ptr(struct ctree_root *root, + struct ctree_path *path, struct key *key, + u64 blocknr, int slot, int level) +{ + struct node *c; + struct node *lower; + struct key *lower_key; + int nritems; + /* need a new root */ + if (!path->nodes[level]) { + c = malloc(sizeof(struct node)); + memset(c, 0, sizeof(c)); + c->header.nritems = 2; + c->header.flags = node_level(level); + lower = path->nodes[level-1]; + if (is_leaf(lower->header.flags)) + lower_key = &((struct leaf *)lower)->items[0].key; + else + lower_key = lower->keys; + memcpy(c->keys, lower_key, sizeof(struct key)); + memcpy(c->keys + 1, key, sizeof(struct key)); + c->blockptrs[0] = (u64)lower; + c->blockptrs[1] = blocknr; + root->node = c; + path->nodes[level] = c; + path->slots[level] = 0; + if (c->keys[1].objectid == 0) + BUG(); + return 0; + } + lower = path->nodes[level]; + nritems = lower->header.nritems; + if (slot > nritems) + BUG(); + if (nritems == NODEPTRS_PER_BLOCK) + BUG(); + if (slot != nritems) { + memmove(lower->keys + slot + 1, lower->keys + slot, + (nritems - slot) * sizeof(struct key)); + memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, + (nritems - slot) * sizeof(u64)); + } + memcpy(lower->keys + slot, key, sizeof(struct key)); + lower->blockptrs[slot] = blocknr; + lower->header.nritems++; + if (lower->keys[1].objectid == 0) + BUG(); + return 0; +} + +int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) +{ + int slot; + struct node *left; + struct node *right; + int push_items = 0; + int left_nritems; + int right_nritems; + + if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) + return 1; + slot = path->slots[level + 1]; + if (slot == 0) + return 1; + + left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]); + right = path->nodes[level]; + left_nritems = left->header.nritems; + right_nritems = right->header.nritems; + push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); + if (push_items <= 0) + return 1; + + if (right_nritems < push_items) + push_items = right_nritems; + memcpy(left->keys + left_nritems, right->keys, + push_items * sizeof(struct key)); + memcpy(left->blockptrs + left_nritems, right->blockptrs, + push_items * sizeof(u64)); + memmove(right->keys, right->keys + push_items, + (right_nritems - push_items) * sizeof(struct key)); + memmove(right->blockptrs, right->blockptrs + push_items, + (right_nritems - push_items) * sizeof(u64)); + right->header.nritems -= push_items; + left->header.nritems += push_items; + + /* adjust the pointers going up the tree */ + fixup_low_keys(path, right->keys, level + 1); + + /* then fixup the leaf pointer in the path */ + if (path->slots[level] < push_items) { + path->slots[level] += left_nritems; + path->nodes[level] = (struct node*)left; + path->slots[level + 1] -= 1; + } else { + path->slots[level] -= push_items; + } + return 0; +} + +int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) +{ + int slot; + struct node *dst; + struct node *src; + int push_items = 0; + int dst_nritems; + int src_nritems; + + if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) + return 1; + slot = path->slots[level + 1]; + if (slot == NODEPTRS_PER_BLOCK - 1) + return 1; + + if (slot >= path->nodes[level + 1]->header.nritems -1) + return 1; + + dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]); + src = path->nodes[level]; + dst_nritems = dst->header.nritems; + src_nritems = src->header.nritems; + push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); + if (push_items <= 0) + return 1; + + if (src_nritems < push_items) + push_items = src_nritems; + memmove(dst->keys + push_items, dst->keys, + dst_nritems * sizeof(struct key)); + memcpy(dst->keys, src->keys + src_nritems - push_items, + push_items * sizeof(struct key)); + + memmove(dst->blockptrs + push_items, dst->blockptrs, + dst_nritems * sizeof(u64)); + memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, + push_items * sizeof(u64)); + + src->header.nritems -= push_items; + dst->header.nritems += push_items; + + /* adjust the pointers going up the tree */ + memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1, + dst->keys, sizeof(struct key)); + /* then fixup the leaf pointer in the path */ + if (path->slots[level] >= src->header.nritems) { + path->slots[level] -= src->header.nritems; + path->nodes[level] = (struct node*)dst; + path->slots[level + 1] += 1; + } + return 0; +} + +int insert_ptr(struct ctree_root *root, + struct ctree_path *path, struct key *key, + u64 blocknr, int level) +{ + struct node *c = path->nodes[level]; + struct node *b; + struct node *bal[MAX_LEVEL]; + int bal_level = level; + int mid; + int bal_start = -1; + + memset(bal, 0, ARRAY_SIZE(bal)); + while(c && c->header.nritems == NODEPTRS_PER_BLOCK) { + if (push_node_left(root, path, + node_level(c->header.flags)) == 0) + break; + if (push_node_right(root, path, + node_level(c->header.flags)) == 0) + break; + bal_start = bal_level; + if (bal_level == MAX_LEVEL - 1) + BUG(); + b = malloc(sizeof(struct node)); + b->header.flags = c->header.flags; + mid = (c->header.nritems + 1) / 2; + memcpy(b->keys, c->keys + mid, + (c->header.nritems - mid) * sizeof(struct key)); + memcpy(b->blockptrs, c->blockptrs + mid, + (c->header.nritems - mid) * sizeof(u64)); + b->header.nritems = c->header.nritems - mid; + c->header.nritems = mid; + bal[bal_level] = b; + if (bal_level == MAX_LEVEL - 1) + break; + bal_level += 1; + c = path->nodes[bal_level]; + } + while(bal_start > 0) { + b = bal[bal_start]; + c = path->nodes[bal_start]; + __insert_ptr(root, path, b->keys, (u64)b, + path->slots[bal_start + 1] + 1, bal_start + 1); + if (path->slots[bal_start] >= c->header.nritems) { + path->slots[bal_start] -= c->header.nritems; + path->nodes[bal_start] = b; + path->slots[bal_start + 1] += 1; + } + bal_start--; + if (!bal[bal_start]) + break; + } + return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1, + level); +} + +int leaf_space_used(struct leaf *l, int start, int nr) +{ + int data_len; + int end = start + nr - 1; + + if (!nr) + return 0; + data_len = l->items[start].offset + l->items[start].size; + data_len = data_len - l->items[end].offset; + data_len += sizeof(struct item) * nr; + return data_len; +} + +int push_leaf_left(struct ctree_root *root, struct ctree_path *path, + int data_size) +{ + struct leaf *right = (struct leaf *)path->nodes[0]; + struct leaf *left; + int slot; + int i; + int free_space; + int push_space = 0; + int push_items = 0; + struct item *item; + int old_left_nritems; + + slot = path->slots[1]; + if (slot == 0) { + return 1; + } + if (!path->nodes[1]) { + return 1; + } + left = read_block(path->nodes[1]->blockptrs[slot - 1]); + free_space = leaf_free_space(left); + if (free_space < data_size + sizeof(struct item)) { + return 1; + } + for (i = 0; i < right->header.nritems; i++) { + item = right->items + i; + if (path->slots[0] == i) + push_space += data_size + sizeof(*item); + if (item->size + sizeof(*item) + push_space > free_space) + break; + push_items++; + push_space += item->size + sizeof(*item); + } + if (push_items == 0) { + return 1; + } + /* push data from right to left */ + memcpy(left->items + left->header.nritems, + right->items, push_items * sizeof(struct item)); + push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; + memcpy(left->data + leaf_data_end(left) - push_space, + right->data + right->items[push_items - 1].offset, + push_space); + old_left_nritems = left->header.nritems; + for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { + left->items[i].offset -= LEAF_DATA_SIZE - + left->items[old_left_nritems -1].offset; + } + left->header.nritems += push_items; + + /* fixup right node */ + push_space = right->items[push_items-1].offset - leaf_data_end(right); + memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + + leaf_data_end(right), push_space); + memmove(right->items, right->items + push_items, + (right->header.nritems - push_items) * sizeof(struct item)); + right->header.nritems -= push_items; + push_space = LEAF_DATA_SIZE; + for (i = 0; i < right->header.nritems; i++) { + right->items[i].offset = push_space - right->items[i].size; + push_space = right->items[i].offset; + } + fixup_low_keys(path, &right->items[0].key, 1); + + /* then fixup the leaf pointer in the path */ + if (path->slots[0] < push_items) { + path->slots[0] += old_left_nritems; + path->nodes[0] = (struct node*)left; + path->slots[1] -= 1; + } else { + path->slots[0] -= push_items; + } + return 0; +} + +int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) +{ + struct leaf *l = (struct leaf *)path->nodes[0]; + int nritems = l->header.nritems; + int mid = (nritems + 1)/ 2; + int slot = path->slots[0]; + struct leaf *right; + int space_needed = data_size + sizeof(struct item); + int data_copy_size; + int rt_data_off; + int i; + int ret; + + if (push_leaf_left(root, path, data_size) == 0) { + return 0; + } + right = malloc(sizeof(struct leaf)); + memset(right, 0, sizeof(*right)); + if (mid <= slot) { + if (leaf_space_used(l, mid, nritems - mid) + space_needed > + LEAF_DATA_SIZE) + BUG(); + } else { + if (leaf_space_used(l, 0, mid + 1) + space_needed > + LEAF_DATA_SIZE) + BUG(); + } + right->header.nritems = nritems - mid; + data_copy_size = l->items[mid].offset + l->items[mid].size - + leaf_data_end(l); + memcpy(right->items, l->items + mid, + (nritems - mid) * sizeof(struct item)); + memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, + l->data + leaf_data_end(l), data_copy_size); + rt_data_off = LEAF_DATA_SIZE - + (l->items[mid].offset + l->items[mid].size); + for (i = 0; i < right->header.nritems; i++) { + right->items[i].offset += rt_data_off; + } + l->header.nritems = mid; + ret = insert_ptr(root, path, &right->items[0].key, + (u64)right, 1); + if (mid <= slot) { + path->nodes[0] = (struct node *)right; + path->slots[0] -= mid; + path->slots[1] += 1; + } + return ret; +} + +int insert_item(struct ctree_root *root, struct key *key, + void *data, int data_size) +{ + int ret; + int slot; + struct leaf *leaf; + unsigned int nritems; + unsigned int data_end; + struct ctree_path path; + + init_path(&path); + ret = search_slot(root, key, &path); + if (ret == 0) + return -EEXIST; + + leaf = (struct leaf *)path.nodes[0]; + if (leaf_free_space(leaf) < sizeof(struct item) + data_size) + split_leaf(root, &path, data_size); + leaf = (struct leaf *)path.nodes[0]; + nritems = leaf->header.nritems; + data_end = leaf_data_end(leaf); + if (leaf_free_space(leaf) < sizeof(struct item) + data_size) + BUG(); + + slot = path.slots[0]; + if (slot == 0) + fixup_low_keys(&path, key, 1); + if (slot != nritems) { + int i; + unsigned int old_data = leaf->items[slot].offset + + leaf->items[slot].size; + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) + leaf->items[i].offset -= data_size; + + /* shift the items */ + memmove(leaf->items + slot + 1, leaf->items + slot, + (nritems - slot) * sizeof(struct item)); + + /* shift the data */ + memmove(leaf->data + data_end - data_size, leaf->data + + data_end, old_data - data_end); + data_end = old_data; + } + memcpy(&leaf->items[slot].key, key, sizeof(struct key)); + leaf->items[slot].offset = data_end - data_size; + leaf->items[slot].size = data_size; + memcpy(leaf->data + data_end - data_size, data, data_size); + leaf->header.nritems += 1; + if (leaf_free_space(leaf) < 0) + BUG(); + return 0; +} + +int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) +{ + int slot; + struct node *node; + int nritems; + + while(1) { + node = path->nodes[level]; + if (!node) + break; + slot = path->slots[level]; + nritems = node->header.nritems; + + if (slot != nritems -1) { + memmove(node->keys + slot, node->keys + slot + 1, + sizeof(struct key) * (nritems - slot - 1)); + memmove(node->blockptrs + slot, + node->blockptrs + slot + 1, + sizeof(u64) * (nritems - slot - 1)); + } + node->header.nritems--; + if (node->header.nritems != 0) { + int tslot; + if (slot == 0) + fixup_low_keys(path, node->keys, level + 1); + tslot = path->slots[level+1]; + push_node_left(root, path, level); + if (node->header.nritems) { + push_node_right(root, path, level); + } + path->slots[level+1] = tslot; + if (node->header.nritems) + break; + } + if (node == root->node) { + printf("root is now null!\n"); + root->node = NULL; + break; + } + level++; + if (!path->nodes[level]) + BUG(); + free(node); + } + return 0; +} + +int del_item(struct ctree_root *root, struct key *key) +{ + int ret; + int slot; + struct leaf *leaf; + struct ctree_path path; + int doff; + int dsize; + + init_path(&path); + ret = search_slot(root, key, &path); + if (ret != 0) + return -1; + + leaf = (struct leaf *)path.nodes[0]; + slot = path.slots[0]; + doff = leaf->items[slot].offset; + dsize = leaf->items[slot].size; + + if (slot != leaf->header.nritems - 1) { + int i; + int data_end = leaf_data_end(leaf); + memmove(leaf->data + data_end + dsize, + leaf->data + data_end, + doff - data_end); + for (i = slot + 1; i < leaf->header.nritems; i++) + leaf->items[i].offset += dsize; + memmove(leaf->items + slot, leaf->items + slot + 1, + sizeof(struct item) * + (leaf->header.nritems - slot - 1)); + } + leaf->header.nritems -= 1; + if (leaf->header.nritems == 0) { + free(leaf); + del_ptr(root, &path, 1); + } else { + if (slot == 0) + fixup_low_keys(&path, &leaf->items[0].key, 1); + if (leaf_space_used(leaf, 0, leaf->header.nritems) < + LEAF_DATA_SIZE / 4) { + /* push_leaf_left fixes the path. + * make sure the path still points to our leaf + * for possible call to del_ptr below + */ + slot = path.slots[1]; + push_leaf_left(root, &path, 1); + path.slots[1] = slot; + if (leaf->header.nritems == 0) { + free(leaf); + del_ptr(root, &path, 1); + } + } + } + return 0; +} + +void print_leaf(struct leaf *l) +{ + int i; + int nr = l->header.nritems; + struct item *item; + printf("leaf %p total ptrs %d free space %d\n", l, nr, + leaf_free_space(l)); + fflush(stdout); + for (i = 0 ; i < nr ; i++) { + item = l->items + i; + printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", + i, + item->key.objectid, item->key.flags, item->key.offset, + item->offset, item->size); + fflush(stdout); + printf("\t\titem data %.*s\n", item->size, l->data+item->offset); + fflush(stdout); + } +} +void print_tree(struct node *c) +{ + int i; + int nr; + + if (!c) + return; + nr = c->header.nritems; + if (is_leaf(c->header.flags)) { + print_leaf((struct leaf *)c); + return; + } + printf("node %p level %d total ptrs %d free spc %lu\n", c, + node_level(c->header.flags), c->header.nritems, + NODEPTRS_PER_BLOCK - c->header.nritems); + fflush(stdout); + for (i = 0; i < nr; i++) { + printf("\tkey %d (%lu %u %lu) block %lx\n", + i, + c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, + c->blockptrs[i]); + fflush(stdout); + } + for (i = 0; i < nr; i++) { + struct node *next = read_block(c->blockptrs[i]); + if (is_leaf(next->header.flags) && + node_level(c->header.flags) != 1) + BUG(); + if (node_level(next->header.flags) != + node_level(c->header.flags) - 1) + BUG(); + print_tree(next); + } + +} + +/* for testing only */ +int next_key(int i, int max_key) { + return rand() % max_key; + // return i; +} + +int main() { + struct leaf *first_node = malloc(sizeof(struct leaf)); + struct ctree_root root; + struct key ins; + char *buf; + int i; + int num; + int ret; + int run_size = 10000000; + int max_key = 100000000; + int tree_size = 0; + struct ctree_path path; + + + srand(55); + root.node = (struct node *)first_node; + memset(first_node, 0, sizeof(*first_node)); + for (i = 0; i < run_size; i++) { + buf = malloc(64); + num = next_key(i, max_key); + // num = i; + sprintf(buf, "string-%d", num); + // printf("insert %d\n", num); + ins.objectid = num; + ins.offset = 0; + ins.flags = 0; + ret = insert_item(&root, &ins, buf, strlen(buf)); + if (!ret) + tree_size++; + } + srand(55); + for (i = 0; i < run_size; i++) { + num = next_key(i, max_key); + ins.objectid = num; + ins.offset = 0; + ins.flags = 0; + init_path(&path); + ret = search_slot(&root, &ins, &path); + if (ret) { + print_tree(root.node); + printf("unable to find %d\n", num); + exit(1); + } + } + printf("node %p level %d total ptrs %d free spc %lu\n", root.node, + node_level(root.node->header.flags), root.node->header.nritems, + NODEPTRS_PER_BLOCK - root.node->header.nritems); + // print_tree(root.node); + printf("all searches good\n"); + i = 0; + srand(55); + for (i = 0; i < run_size; i++) { + num = next_key(i, max_key); + ins.objectid = num; + del_item(&root, &ins); + } + print_tree(root.node); + return 0; +} -- cgit v1.2.3-18-g5258 From 4920c9ac9a4bbc6bf9acd8c614987ee6b378e78f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 26 Jan 2007 16:38:42 -0500 Subject: Btrfs: Faster deletes, add Makefile and kerncompat Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 7 +++++ fs/btrfs/ctree.c | 82 ++++++++++++++++++++++++++++++++++++++------------- fs/btrfs/kerncompat.h | 68 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 21 deletions(-) create mode 100644 fs/btrfs/Makefile create mode 100644 fs/btrfs/kerncompat.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile new file mode 100644 index 00000000000..9f84c08baab --- /dev/null +++ b/fs/btrfs/Makefile @@ -0,0 +1,7 @@ + +ctree: ctree.o + gcc -g -O2 -Wall -o ctree ctree.c + +clean: + rm ctree ctree.o + diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 893fd56960a..4bf5e92584b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -615,9 +615,9 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) if (node->header.nritems) { push_node_right(root, path, level); } - path->slots[level+1] = tslot; if (node->header.nritems) break; + path->slots[level+1] = tslot; } if (node == root->node) { printf("root is now null!\n"); @@ -632,22 +632,15 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) return 0; } -int del_item(struct ctree_root *root, struct key *key) +int del_item(struct ctree_root *root, struct ctree_path *path) { - int ret; int slot; struct leaf *leaf; - struct ctree_path path; int doff; int dsize; - init_path(&path); - ret = search_slot(root, key, &path); - if (ret != 0) - return -1; - - leaf = (struct leaf *)path.nodes[0]; - slot = path.slots[0]; + leaf = (struct leaf *)path->nodes[0]; + slot = path->slots[0]; doff = leaf->items[slot].offset; dsize = leaf->items[slot].size; @@ -665,23 +658,26 @@ int del_item(struct ctree_root *root, struct key *key) } leaf->header.nritems -= 1; if (leaf->header.nritems == 0) { + if (leaf == (struct leaf *)root->node) + root->node = NULL; + else + del_ptr(root, path, 1); free(leaf); - del_ptr(root, &path, 1); } else { if (slot == 0) - fixup_low_keys(&path, &leaf->items[0].key, 1); + fixup_low_keys(path, &leaf->items[0].key, 1); if (leaf_space_used(leaf, 0, leaf->header.nritems) < LEAF_DATA_SIZE / 4) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below */ - slot = path.slots[1]; - push_leaf_left(root, &path, 1); - path.slots[1] = slot; + slot = path->slots[1]; + push_leaf_left(root, path, 1); if (leaf->header.nritems == 0) { free(leaf); - del_ptr(root, &path, 1); + path->slots[1] = slot; + del_ptr(root, path, 1); } } } @@ -753,11 +749,12 @@ int main() { struct leaf *first_node = malloc(sizeof(struct leaf)); struct ctree_root root; struct key ins; + struct key last = { (u64)-1, 0, 0}; char *buf; int i; int num; int ret; - int run_size = 10000000; + int run_size = 100000; int max_key = 100000000; int tree_size = 0; struct ctree_path path; @@ -783,8 +780,6 @@ int main() { for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; - ins.offset = 0; - ins.flags = 0; init_path(&path); ret = search_slot(&root, &ins, &path); if (ret) { @@ -800,11 +795,56 @@ int main() { printf("all searches good\n"); i = 0; srand(55); + for (i = 0 ; i < run_size/4; i++) { + num = next_key(i, max_key); + ins.objectid = num; + init_path(&path); + ret = search_slot(&root, &ins, &path); + if (ret) + continue; + ret = del_item(&root, &path); + if (ret != 0) + BUG(); + tree_size--; + } + srand(128); for (i = 0; i < run_size; i++) { + buf = malloc(64); num = next_key(i, max_key); + sprintf(buf, "string-%d", num); ins.objectid = num; - del_item(&root, &ins); + ret = insert_item(&root, &ins, buf, strlen(buf)); + if (!ret) + tree_size++; + } + while(root.node) { + struct leaf *leaf; + int slot; + ins.objectid = (u64)-1; + init_path(&path); + ret = search_slot(&root, &ins, &path); + if (ret == 0) + BUG(); + + leaf = (struct leaf *)(path.nodes[0]); + slot = path.slots[0]; + if (slot != leaf->header.nritems) + BUG(); + while(path.slots[0] > 0) { + path.slots[0] -= 1; + slot = path.slots[0]; + leaf = (struct leaf *)(path.nodes[0]); + + if (comp_keys(&last, &leaf->items[slot].key) <= 0) + BUG(); + memcpy(&last, &leaf->items[slot].key, sizeof(last)); + ret = del_item(&root, &path); + if (ret != 0) + BUG(); + tree_size--; + } } print_tree(root.node); + printf("tree size is now %d\n", tree_size); return 0; } diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h new file mode 100644 index 00000000000..3a4bb4d661f --- /dev/null +++ b/fs/btrfs/kerncompat.h @@ -0,0 +1,68 @@ +#ifndef __KERNCOMPAT +#define __KERNCOMPAT +#define gfp_t int +#define get_cpu_var(p) (p) +#define __get_cpu_var(p) (p) +#define BITS_PER_LONG 64 +#define __GFP_BITS_SHIFT 20 +#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) +#define __read_mostly +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define __force +#define PAGE_SHIFT 12 +#define ULONG_MAX (~0UL) +#define BUG() abort() + +typedef unsigned int u32; +typedef unsigned long u64; +typedef unsigned char u8; +typedef unsigned short u16; + +typedef unsigned long pgoff_t; + +#include +#include +#include + +struct vma_shared { int prio_tree_node; }; +struct vm_area_struct { + unsigned long vm_pgoff; + unsigned long vm_start; + unsigned long vm_end; + struct vma_shared shared; +}; + +struct page { + unsigned long index; +}; + +static inline void preempt_enable(void) { do {; } while(0);} +static inline void preempt_disable(void) { do {; } while(0);} + +static inline void __set_bit(int bit, unsigned long *map) { + unsigned long *p = map + bit / BITS_PER_LONG; + bit = bit & (BITS_PER_LONG -1); + *p |= 1UL << bit; +} + +static inline int test_bit(int bit, unsigned long *map) { + unsigned long *p = map + bit / BITS_PER_LONG; + bit = bit & (BITS_PER_LONG -1); + return *p & (1UL << bit) ? 1 : 0; +} + +static inline void __clear_bit(int bit, unsigned long *map) { + unsigned long *p = map + bit / BITS_PER_LONG; + bit = bit & (BITS_PER_LONG -1); + *p &= ~(1UL << bit); +} +#define BUG_ON(c) do { if (c) abort(); } while (0) + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - __builtin_offsetof(type,member) );}) + +#endif + +#define ENOMEM 5 +#define EEXIST 6 -- cgit v1.2.3-18-g5258 From eb60ceac0ebd11e9896ed1ca84fcad71a4b23601 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Feb 2007 09:18:22 -0500 Subject: Btrfs: Add backing store, memory management Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 11 +- fs/btrfs/ctree.c | 432 ++++++++++++++++---------- fs/btrfs/ctree.h | 62 ++++ fs/btrfs/disk-io.c | 174 +++++++++++ fs/btrfs/disk-io.h | 21 ++ fs/btrfs/kerncompat.h | 1 + fs/btrfs/radix-tree.c | 836 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/radix-tree.h | 73 +++++ 8 files changed, 1447 insertions(+), 163 deletions(-) create mode 100644 fs/btrfs/ctree.h create mode 100644 fs/btrfs/disk-io.c create mode 100644 fs/btrfs/disk-io.h create mode 100644 fs/btrfs/radix-tree.c create mode 100644 fs/btrfs/radix-tree.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9f84c08baab..63360212a07 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,7 +1,12 @@ -ctree: ctree.o - gcc -g -O2 -Wall -o ctree ctree.c +CFLAGS= -g -Wall + +.c.o: + $(CC) $(CFLAGS) -c $< + +ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h + gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o clean: - rm ctree ctree.o + rm ctree *.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4bf5e92584b..6f0522f2108 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,68 +1,25 @@ #include #include #include "kerncompat.h" - -#define BLOCKSIZE 4096 - -struct key { - u64 objectid; - u32 flags; - u64 offset; -} __attribute__ ((__packed__)); - -struct header { - u64 fsid[2]; /* FS specific uuid */ - u64 blocknum; - u64 parentid; - u32 csum; - u32 ham; - u16 nritems; - u16 flags; -} __attribute__ ((__packed__)); - -#define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \ - (sizeof(struct key) + sizeof(u64))) - -#define LEVEL_BITS 3 -#define MAX_LEVEL (1 << LEVEL_BITS) -#define node_level(f) ((f) & (MAX_LEVEL-1)) -#define is_leaf(f) (node_level(f) == 0) - -struct ctree_root { - struct node *node; -}; - -struct item { - struct key key; - u16 offset; - u16 size; -} __attribute__ ((__packed__)); - -#define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header)) -struct leaf { - struct header header; - union { - struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; - u8 data[BLOCKSIZE-sizeof(struct header)]; - }; -} __attribute__ ((__packed__)); - -struct node { - struct header header; - struct key keys[NODEPTRS_PER_BLOCK]; - u64 blockptrs[NODEPTRS_PER_BLOCK]; -} __attribute__ ((__packed__)); - -struct ctree_path { - struct node *nodes[MAX_LEVEL]; - int slots[MAX_LEVEL]; -}; +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" static inline void init_path(struct ctree_path *p) { memset(p, 0, sizeof(*p)); } +static void release_path(struct ctree_root *root, struct ctree_path *p) +{ + int i; + for (i = 0; i < MAX_LEVEL; i++) { + if (!p->nodes[i]) + break; + tree_block_release(root, p->nodes[i]); + } +} + static inline unsigned int leaf_data_end(struct leaf *leaf) { unsigned int nr = leaf->header.nritems; @@ -135,26 +92,25 @@ int bin_search(struct node *c, struct key *key, int *slot) return -1; } -void *read_block(u64 blocknum) -{ - return (void *)blocknum; -} - int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) { - struct node *c = root->node; + struct tree_buffer *b = root->node; + struct node *c; + int slot; int ret; int level; - while (c) { + b->count++; + while (b) { + c = &b->node; level = node_level(c->header.flags); - p->nodes[level] = c; + p->nodes[level] = b; ret = bin_search(c, key, &slot); if (!is_leaf(c->header.flags)) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - c = read_block(c->blockptrs[slot]); + b = read_tree_block(root, c->blockptrs[slot]); continue; } else { p->slots[level] = slot; @@ -164,17 +120,20 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) return -1; } -static void fixup_low_keys(struct ctree_path *path, struct key *key, - int level) +static void fixup_low_keys(struct ctree_root *root, + struct ctree_path *path, struct key *key, + int level) { int i; /* adjust the pointers going up the tree */ for (i = level; i < MAX_LEVEL; i++) { - struct node *t = path->nodes[i]; + struct node *t; int tslot = path->slots[i]; - if (!t) + if (!path->nodes[i]) break; + t = &path->nodes[i]->node; memcpy(t->keys + tslot, key, sizeof(*key)); + write_tree_block(root, path->nodes[i]); if (tslot != 0) break; } @@ -190,27 +149,34 @@ int __insert_ptr(struct ctree_root *root, int nritems; /* need a new root */ if (!path->nodes[level]) { - c = malloc(sizeof(struct node)); + struct tree_buffer *t; + t = alloc_free_block(root); + c = &t->node; memset(c, 0, sizeof(c)); c->header.nritems = 2; c->header.flags = node_level(level); - lower = path->nodes[level-1]; + c->header.blocknr = t->blocknr; + lower = &path->nodes[level-1]->node; if (is_leaf(lower->header.flags)) lower_key = &((struct leaf *)lower)->items[0].key; else lower_key = lower->keys; memcpy(c->keys, lower_key, sizeof(struct key)); memcpy(c->keys + 1, key, sizeof(struct key)); - c->blockptrs[0] = (u64)lower; + c->blockptrs[0] = path->nodes[level-1]->blocknr; c->blockptrs[1] = blocknr; - root->node = c; - path->nodes[level] = c; + /* the path has an extra ref to root->node */ + tree_block_release(root, root->node); + root->node = t; + t->count++; + write_tree_block(root, t); + path->nodes[level] = t; path->slots[level] = 0; if (c->keys[1].objectid == 0) BUG(); return 0; } - lower = path->nodes[level]; + lower = &path->nodes[level]->node; nritems = lower->header.nritems; if (slot > nritems) BUG(); @@ -227,6 +193,7 @@ int __insert_ptr(struct ctree_root *root, lower->header.nritems++; if (lower->keys[1].objectid == 0) BUG(); + write_tree_block(root, path->nodes[level]); return 0; } @@ -238,6 +205,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) int push_items = 0; int left_nritems; int right_nritems; + struct tree_buffer *t; + struct tree_buffer *right_buf; if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) return 1; @@ -245,13 +214,18 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) if (slot == 0) return 1; - left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]); - right = path->nodes[level]; + t = read_tree_block(root, + path->nodes[level + 1]->node.blockptrs[slot - 1]); + left = &t->node; + right_buf = path->nodes[level]; + right = &right_buf->node; left_nritems = left->header.nritems; right_nritems = right->header.nritems; push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); - if (push_items <= 0) + if (push_items <= 0) { + tree_block_release(root, t); return 1; + } if (right_nritems < push_items) push_items = right_nritems; @@ -267,15 +241,20 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) left->header.nritems += push_items; /* adjust the pointers going up the tree */ - fixup_low_keys(path, right->keys, level + 1); + fixup_low_keys(root, path, right->keys, level + 1); + + write_tree_block(root, t); + write_tree_block(root, right_buf); /* then fixup the leaf pointer in the path */ if (path->slots[level] < push_items) { path->slots[level] += left_nritems; - path->nodes[level] = (struct node*)left; + tree_block_release(root, path->nodes[level]); + path->nodes[level] = t; path->slots[level + 1] -= 1; } else { path->slots[level] -= push_items; + tree_block_release(root, t); } return 0; } @@ -283,6 +262,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) { int slot; + struct tree_buffer *t; + struct tree_buffer *src_buffer; struct node *dst; struct node *src; int push_items = 0; @@ -295,16 +276,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) if (slot == NODEPTRS_PER_BLOCK - 1) return 1; - if (slot >= path->nodes[level + 1]->header.nritems -1) + if (slot >= path->nodes[level + 1]->node.header.nritems -1) return 1; - dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]); - src = path->nodes[level]; + t = read_tree_block(root, + path->nodes[level + 1]->node.blockptrs[slot + 1]); + dst = &t->node; + src_buffer = path->nodes[level]; + src = &src_buffer->node; dst_nritems = dst->header.nritems; src_nritems = src->header.nritems; push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); - if (push_items <= 0) + if (push_items <= 0) { + tree_block_release(root, t); return 1; + } if (src_nritems < push_items) push_items = src_nritems; @@ -322,13 +308,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) dst->header.nritems += push_items; /* adjust the pointers going up the tree */ - memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1, + memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1, dst->keys, sizeof(struct key)); + + write_tree_block(root, path->nodes[level + 1]); + write_tree_block(root, t); + write_tree_block(root, src_buffer); + /* then fixup the leaf pointer in the path */ if (path->slots[level] >= src->header.nritems) { path->slots[level] -= src->header.nritems; - path->nodes[level] = (struct node*)dst; + tree_block_release(root, path->nodes[level]); + path->nodes[level] = t; path->slots[level + 1] += 1; + } else { + tree_block_release(root, t); } return 0; } @@ -337,15 +331,18 @@ int insert_ptr(struct ctree_root *root, struct ctree_path *path, struct key *key, u64 blocknr, int level) { - struct node *c = path->nodes[level]; + struct tree_buffer *t = path->nodes[level]; + struct node *c = &path->nodes[level]->node; struct node *b; - struct node *bal[MAX_LEVEL]; + struct tree_buffer *b_buffer; + struct tree_buffer *bal[MAX_LEVEL]; int bal_level = level; int mid; int bal_start = -1; memset(bal, 0, ARRAY_SIZE(bal)); - while(c && c->header.nritems == NODEPTRS_PER_BLOCK) { + while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) { + c = &t->node; if (push_node_left(root, path, node_level(c->header.flags)) == 0) break; @@ -355,8 +352,10 @@ int insert_ptr(struct ctree_root *root, bal_start = bal_level; if (bal_level == MAX_LEVEL - 1) BUG(); - b = malloc(sizeof(struct node)); + b_buffer = alloc_free_block(root); + b = &b_buffer->node; b->header.flags = c->header.flags; + b->header.blocknr = b_buffer->blocknr; mid = (c->header.nritems + 1) / 2; memcpy(b->keys, c->keys + mid, (c->header.nritems - mid) * sizeof(struct key)); @@ -364,21 +363,28 @@ int insert_ptr(struct ctree_root *root, (c->header.nritems - mid) * sizeof(u64)); b->header.nritems = c->header.nritems - mid; c->header.nritems = mid; - bal[bal_level] = b; + + write_tree_block(root, t); + write_tree_block(root, b_buffer); + + bal[bal_level] = b_buffer; if (bal_level == MAX_LEVEL - 1) break; bal_level += 1; - c = path->nodes[bal_level]; + t = path->nodes[bal_level]; } while(bal_start > 0) { - b = bal[bal_start]; - c = path->nodes[bal_start]; - __insert_ptr(root, path, b->keys, (u64)b, + b_buffer = bal[bal_start]; + c = &path->nodes[bal_start]->node; + __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr, path->slots[bal_start + 1] + 1, bal_start + 1); if (path->slots[bal_start] >= c->header.nritems) { path->slots[bal_start] -= c->header.nritems; - path->nodes[bal_start] = b; + tree_block_release(root, path->nodes[bal_start]); + path->nodes[bal_start] = b_buffer; path->slots[bal_start + 1] += 1; + } else { + tree_block_release(root, b_buffer); } bal_start--; if (!bal[bal_start]) @@ -404,7 +410,9 @@ int leaf_space_used(struct leaf *l, int start, int nr) int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int data_size) { - struct leaf *right = (struct leaf *)path->nodes[0]; + struct tree_buffer *right_buf = path->nodes[0]; + struct leaf *right = &right_buf->leaf; + struct tree_buffer *t; struct leaf *left; int slot; int i; @@ -421,9 +429,11 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, if (!path->nodes[1]) { return 1; } - left = read_block(path->nodes[1]->blockptrs[slot - 1]); + t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]); + left = &t->leaf; free_space = leaf_free_space(left); if (free_space < data_size + sizeof(struct item)) { + tree_block_release(root, t); return 1; } for (i = 0; i < right->header.nritems; i++) { @@ -436,6 +446,7 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space += item->size + sizeof(*item); } if (push_items == 0) { + tree_block_release(root, t); return 1; } /* push data from right to left */ @@ -446,6 +457,8 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, right->data + right->items[push_items - 1].offset, push_space); old_left_nritems = left->header.nritems; + BUG_ON(old_left_nritems < 0); + for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { left->items[i].offset -= LEAF_DATA_SIZE - left->items[old_left_nritems -1].offset; @@ -460,30 +473,40 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, (right->header.nritems - push_items) * sizeof(struct item)); right->header.nritems -= push_items; push_space = LEAF_DATA_SIZE; + for (i = 0; i < right->header.nritems; i++) { right->items[i].offset = push_space - right->items[i].size; push_space = right->items[i].offset; } - fixup_low_keys(path, &right->items[0].key, 1); + + write_tree_block(root, t); + write_tree_block(root, right_buf); + + fixup_low_keys(root, path, &right->items[0].key, 1); /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - path->nodes[0] = (struct node*)left; + tree_block_release(root, path->nodes[0]); + path->nodes[0] = t; path->slots[1] -= 1; } else { + tree_block_release(root, t); path->slots[0] -= push_items; } + BUG_ON(path->slots[0] < 0); return 0; } int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) { - struct leaf *l = (struct leaf *)path->nodes[0]; - int nritems = l->header.nritems; - int mid = (nritems + 1)/ 2; - int slot = path->slots[0]; + struct tree_buffer *l_buf = path->nodes[0]; + struct leaf *l = &l_buf->leaf; + int nritems; + int mid; + int slot; struct leaf *right; + struct tree_buffer *right_buffer; int space_needed = data_size + sizeof(struct item); int data_copy_size; int rt_data_off; @@ -491,9 +514,19 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) int ret; if (push_leaf_left(root, path, data_size) == 0) { - return 0; + l_buf = path->nodes[0]; + l = &l_buf->leaf; + if (leaf_free_space(l) >= sizeof(struct item) + data_size) + return 0; } - right = malloc(sizeof(struct leaf)); + slot = path->slots[0]; + nritems = l->header.nritems; + mid = (nritems + 1)/ 2; + + right_buffer = alloc_free_block(root); + BUG_ON(!right_buffer); + BUG_ON(mid == nritems); + right = &right_buffer->leaf; memset(right, 0, sizeof(*right)); if (mid <= slot) { if (leaf_space_used(l, mid, nritems - mid) + space_needed > @@ -505,6 +538,8 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) BUG(); } right->header.nritems = nritems - mid; + right->header.blocknr = right_buffer->blocknr; + right->header.flags = node_level(0); data_copy_size = l->items[mid].offset + l->items[mid].size - leaf_data_end(l); memcpy(right->items, l->items + mid, @@ -518,12 +553,20 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) } l->header.nritems = mid; ret = insert_ptr(root, path, &right->items[0].key, - (u64)right, 1); + right_buffer->blocknr, 1); + + write_tree_block(root, right_buffer); + write_tree_block(root, l_buf); + + BUG_ON(path->slots[0] != slot); if (mid <= slot) { - path->nodes[0] = (struct node *)right; + tree_block_release(root, path->nodes[0]); + path->nodes[0] = right_buffer; path->slots[0] -= mid; path->slots[1] += 1; - } + } else + tree_block_release(root, right_buffer); + BUG_ON(path->slots[0] < 0); return ret; } @@ -532,28 +575,48 @@ int insert_item(struct ctree_root *root, struct key *key, { int ret; int slot; + int slot_orig; struct leaf *leaf; + struct tree_buffer *leaf_buf; unsigned int nritems; unsigned int data_end; struct ctree_path path; + if (!root->node) { + struct tree_buffer *t; + t = alloc_free_block(root); + BUG_ON(!t); + t->node.header.nritems = 0; + t->node.header.flags = node_level(0); + t->node.header.blocknr = t->blocknr; + root->node = t; + write_tree_block(root, t); + } init_path(&path); ret = search_slot(root, key, &path); - if (ret == 0) + if (ret == 0) { + release_path(root, &path); return -EEXIST; + } - leaf = (struct leaf *)path.nodes[0]; - if (leaf_free_space(leaf) < sizeof(struct item) + data_size) + slot_orig = path.slots[0]; + leaf_buf = path.nodes[0]; + leaf = &leaf_buf->leaf; + if (leaf_free_space(leaf) < sizeof(struct item) + data_size) { split_leaf(root, &path, data_size); - leaf = (struct leaf *)path.nodes[0]; + leaf_buf = path.nodes[0]; + leaf = &path.nodes[0]->leaf; + } nritems = leaf->header.nritems; data_end = leaf_data_end(leaf); + if (leaf_free_space(leaf) < sizeof(struct item) + data_size) BUG(); slot = path.slots[0]; + BUG_ON(slot < 0); if (slot == 0) - fixup_low_keys(&path, key, 1); + fixup_low_keys(root, &path, key, 1); if (slot != nritems) { int i; unsigned int old_data = leaf->items[slot].offset + @@ -580,21 +643,25 @@ int insert_item(struct ctree_root *root, struct key *key, leaf->items[slot].size = data_size; memcpy(leaf->data + data_end - data_size, data, data_size); leaf->header.nritems += 1; + write_tree_block(root, leaf_buf); if (leaf_free_space(leaf) < 0) BUG(); + release_path(root, &path); return 0; } int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) { int slot; + struct tree_buffer *t; struct node *node; int nritems; while(1) { - node = path->nodes[level]; - if (!node) + t = path->nodes[level]; + if (!t) break; + node = &t->node; slot = path->slots[level]; nritems = node->header.nritems; @@ -606,28 +673,34 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) sizeof(u64) * (nritems - slot - 1)); } node->header.nritems--; + write_tree_block(root, t); if (node->header.nritems != 0) { int tslot; if (slot == 0) - fixup_low_keys(path, node->keys, level + 1); + fixup_low_keys(root, path, node->keys, + level + 1); tslot = path->slots[level+1]; + t->count++; push_node_left(root, path, level); if (node->header.nritems) { push_node_right(root, path, level); } - if (node->header.nritems) + if (node->header.nritems) { + tree_block_release(root, t); break; + } + tree_block_release(root, t); path->slots[level+1] = tslot; } - if (node == root->node) { - printf("root is now null!\n"); - root->node = NULL; + if (t == root->node) { + /* just turn the root into a leaf and break */ + root->node->node.header.flags = node_level(0); + write_tree_block(root, t); break; } level++; if (!path->nodes[level]) BUG(); - free(node); } return 0; } @@ -636,10 +709,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) { int slot; struct leaf *leaf; + struct tree_buffer *leaf_buf; int doff; int dsize; - leaf = (struct leaf *)path->nodes[0]; + leaf_buf = path->nodes[0]; + leaf = &leaf_buf->leaf; slot = path->slots[0]; doff = leaf->items[slot].offset; dsize = leaf->items[slot].size; @@ -658,14 +733,15 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } leaf->header.nritems -= 1; if (leaf->header.nritems == 0) { - if (leaf == (struct leaf *)root->node) - root->node = NULL; - else + if (leaf_buf == root->node) { + leaf->header.flags = node_level(0); + write_tree_block(root, leaf_buf); + } else del_ptr(root, path, 1); - free(leaf); } else { if (slot == 0) - fixup_low_keys(path, &leaf->items[0].key, 1); + fixup_low_keys(root, path, &leaf->items[0].key, 1); + write_tree_block(root, leaf_buf); if (leaf_space_used(leaf, 0, leaf->header.nritems) < LEAF_DATA_SIZE / 4) { /* push_leaf_left fixes the path. @@ -673,12 +749,13 @@ int del_item(struct ctree_root *root, struct ctree_path *path) * for possible call to del_ptr below */ slot = path->slots[1]; + leaf_buf->count++; push_leaf_left(root, path, 1); if (leaf->header.nritems == 0) { - free(leaf); path->slots[1] = slot; del_ptr(root, path, 1); } + tree_block_release(root, leaf_buf); } } return 0; @@ -689,7 +766,7 @@ void print_leaf(struct leaf *l) int i; int nr = l->header.nritems; struct item *item; - printf("leaf %p total ptrs %d free space %d\n", l, nr, + printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, leaf_free_space(l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { @@ -703,38 +780,45 @@ void print_leaf(struct leaf *l) fflush(stdout); } } -void print_tree(struct node *c) +void print_tree(struct ctree_root *root, struct tree_buffer *t) { int i; int nr; + struct node *c; - if (!c) + if (!t) return; + c = &t->node; nr = c->header.nritems; + if (c->header.blocknr != t->blocknr) + BUG(); if (is_leaf(c->header.flags)) { print_leaf((struct leaf *)c); return; } - printf("node %p level %d total ptrs %d free spc %lu\n", c, + printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr, node_level(c->header.flags), c->header.nritems, NODEPTRS_PER_BLOCK - c->header.nritems); fflush(stdout); for (i = 0; i < nr; i++) { - printf("\tkey %d (%lu %u %lu) block %lx\n", + printf("\tkey %d (%lu %u %lu) block %lu\n", i, c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, c->blockptrs[i]); fflush(stdout); } for (i = 0; i < nr; i++) { - struct node *next = read_block(c->blockptrs[i]); + struct tree_buffer *next_buf = read_tree_block(root, + c->blockptrs[i]); + struct node *next = &next_buf->node; if (is_leaf(next->header.flags) && node_level(c->header.flags) != 1) BUG(); if (node_level(next->header.flags) != node_level(c->header.flags) - 1) BUG(); - print_tree(next); + print_tree(root, next_buf); + tree_block_release(root, next_buf); } } @@ -746,23 +830,24 @@ int next_key(int i, int max_key) { } int main() { - struct leaf *first_node = malloc(sizeof(struct leaf)); - struct ctree_root root; + struct ctree_root *root; struct key ins; struct key last = { (u64)-1, 0, 0}; char *buf; int i; int num; int ret; - int run_size = 100000; + int run_size = 1000000; int max_key = 100000000; int tree_size = 0; struct ctree_path path; + radix_tree_init(); + + + root = open_ctree("dbfile"); srand(55); - root.node = (struct node *)first_node; - memset(first_node, 0, sizeof(*first_node)); for (i = 0; i < run_size; i++) { buf = malloc(64); num = next_key(i, max_key); @@ -772,39 +857,46 @@ int main() { ins.objectid = num; ins.offset = 0; ins.flags = 0; - ret = insert_item(&root, &ins, buf, strlen(buf)); + ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; } + close_ctree(root); + root = open_ctree("dbfile"); + printf("starting search\n"); srand(55); for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(&root, &ins, &path); + ret = search_slot(root, &ins, &path); if (ret) { - print_tree(root.node); + print_tree(root, root->node); printf("unable to find %d\n", num); exit(1); } - } - printf("node %p level %d total ptrs %d free spc %lu\n", root.node, - node_level(root.node->header.flags), root.node->header.nritems, - NODEPTRS_PER_BLOCK - root.node->header.nritems); - // print_tree(root.node); - printf("all searches good\n"); + release_path(root, &path); + } + close_ctree(root); + root = open_ctree("dbfile"); + printf("node %p level %d total ptrs %d free spc %lu\n", root->node, + node_level(root->node->node.header.flags), + root->node->node.header.nritems, + NODEPTRS_PER_BLOCK - root->node->node.header.nritems); + printf("all searches good, deleting some items\n"); i = 0; srand(55); for (i = 0 ; i < run_size/4; i++) { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(&root, &ins, &path); + ret = search_slot(root, &ins, &path); if (ret) continue; - ret = del_item(&root, &path); + ret = del_item(root, &path); if (ret != 0) BUG(); + release_path(root, &path); tree_size--; } srand(128); @@ -813,38 +905,58 @@ int main() { num = next_key(i, max_key); sprintf(buf, "string-%d", num); ins.objectid = num; - ret = insert_item(&root, &ins, buf, strlen(buf)); + ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; } - while(root.node) { + close_ctree(root); + root = open_ctree("dbfile"); + printf("starting search2\n"); + srand(128); + for (i = 0; i < run_size; i++) { + num = next_key(i, max_key); + ins.objectid = num; + init_path(&path); + ret = search_slot(root, &ins, &path); + if (ret) { + print_tree(root, root->node); + printf("unable to find %d\n", num); + exit(1); + } + release_path(root, &path); + } + printf("starting big long delete run\n"); + while(root->node && root->node->node.header.nritems > 0) { struct leaf *leaf; int slot; ins.objectid = (u64)-1; init_path(&path); - ret = search_slot(&root, &ins, &path); + ret = search_slot(root, &ins, &path); if (ret == 0) BUG(); - leaf = (struct leaf *)(path.nodes[0]); + leaf = &path.nodes[0]->leaf; slot = path.slots[0]; if (slot != leaf->header.nritems) BUG(); while(path.slots[0] > 0) { path.slots[0] -= 1; slot = path.slots[0]; - leaf = (struct leaf *)(path.nodes[0]); + leaf = &path.nodes[0]->leaf; if (comp_keys(&last, &leaf->items[slot].key) <= 0) BUG(); memcpy(&last, &leaf->items[slot].key, sizeof(last)); - ret = del_item(&root, &path); - if (ret != 0) + ret = del_item(root, &path); + if (ret != 0) { + printf("del_item returned %d\n", ret); BUG(); + } tree_size--; } + release_path(root, &path); } - print_tree(root.node); + close_ctree(root); printf("tree size is now %d\n", tree_size); return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h new file mode 100644 index 00000000000..586bf186604 --- /dev/null +++ b/fs/btrfs/ctree.h @@ -0,0 +1,62 @@ +#ifndef __CTREE__ +#define __CTREE__ + +#define CTREE_BLOCKSIZE 4096 + +struct key { + u64 objectid; + u32 flags; + u64 offset; +} __attribute__ ((__packed__)); + +struct header { + u64 fsid[2]; /* FS specific uuid */ + u64 blocknr; + u64 parentid; + u32 csum; + u32 ham; + u16 nritems; + u16 flags; +} __attribute__ ((__packed__)); + +#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \ + (sizeof(struct key) + sizeof(u64))) + +#define LEVEL_BITS 3 +#define MAX_LEVEL (1 << LEVEL_BITS) +#define node_level(f) ((f) & (MAX_LEVEL-1)) +#define is_leaf(f) (node_level(f) == 0) + +struct tree_buffer; +struct ctree_root { + struct tree_buffer *node; + int fp; + struct radix_tree_root cache_radix; +}; + +struct item { + struct key key; + u16 offset; + u16 size; +} __attribute__ ((__packed__)); + +#define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct header)) +struct leaf { + struct header header; + union { + struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; + u8 data[CTREE_BLOCKSIZE-sizeof(struct header)]; + }; +} __attribute__ ((__packed__)); + +struct node { + struct header header; + struct key keys[NODEPTRS_PER_BLOCK]; + u64 blockptrs[NODEPTRS_PER_BLOCK]; +} __attribute__ ((__packed__)); + +struct ctree_path { + struct tree_buffer *nodes[MAX_LEVEL]; + int slots[MAX_LEVEL]; +}; +#endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c new file mode 100644 index 00000000000..8d51a07051d --- /dev/null +++ b/fs/btrfs/disk-io.c @@ -0,0 +1,174 @@ +#define _XOPEN_SOURCE 500 +#include +#include +#include +#include +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" + +static int allocated_blocks = 0; + +struct ctree_header { + u64 root_block; +} __attribute__ ((__packed__)); + +static int get_free_block(struct ctree_root *root, u64 *block) +{ + struct stat st; + int ret; + + st.st_size = 0; + ret = fstat(root->fp, &st); + if (st.st_size > sizeof(struct ctree_header)) { + *block = (st.st_size - + sizeof(struct ctree_header)) / CTREE_BLOCKSIZE; + } else { + *block = 0; + } + ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) * + CTREE_BLOCKSIZE); + return ret; +} + +struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) +{ + struct tree_buffer *buf; + int ret; + buf = malloc(sizeof(struct tree_buffer)); + if (!buf) + return buf; + allocated_blocks++; + buf->blocknr = blocknr; + buf->count = 1; + radix_tree_preload(GFP_KERNEL); + ret = radix_tree_insert(&root->cache_radix, blocknr, buf); + radix_tree_preload_end(); + if (ret) { + free(buf); + return NULL; + } + return buf; +} + +struct tree_buffer *alloc_free_block(struct ctree_root *root) +{ + u64 free_block; + int ret; + struct tree_buffer * buf; + ret = get_free_block(root, &free_block); + if (ret) { + BUG(); + return NULL; + } + buf = alloc_tree_block(root, free_block); + if (!buf) + BUG(); + return buf; +} + +struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) +{ + loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + struct tree_buffer *buf; + int ret; + + buf = radix_tree_lookup(&root->cache_radix, blocknr); + if (buf) { + buf->count++; + if (buf->blocknr != blocknr) + BUG(); + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + return buf; + } + buf = alloc_tree_block(root, blocknr); + if (!buf) + return NULL; + ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); + if (ret != CTREE_BLOCKSIZE) { + free(buf); + return NULL; + } + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + return buf; +} + +int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) +{ + u64 blocknr = buf->blocknr; + loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + int ret; + + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); + if (ret != CTREE_BLOCKSIZE) + return ret; + if (buf == root->node) + return update_root_block(root); + return 0; +} + +struct ctree_root *open_ctree(char *filename) +{ + struct ctree_root *root = malloc(sizeof(struct ctree_root)); + int fp; + u64 root_block; + int ret; + + fp = open(filename, O_CREAT | O_RDWR); + if (fp < 0) { + free(root); + return NULL; + } + root->fp = fp; + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + ret = pread(fp, &root_block, sizeof(u64), 0); + if (ret == sizeof(u64)) { + printf("reading root node at block %lu\n", root_block); + root->node = read_tree_block(root, root_block); + } else + root->node = NULL; + return root; +} + +int close_ctree(struct ctree_root *root) +{ + close(root->fp); + if (root->node) + tree_block_release(root, root->node); + free(root); + printf("on close %d blocks are allocated\n", allocated_blocks); + return 0; +} + +int update_root_block(struct ctree_root *root) +{ + int ret; + u64 root_block = root->node->blocknr; + + ret = pwrite(root->fp, &root_block, sizeof(u64), 0); + if (ret != sizeof(u64)) + return ret; + return 0; +} + +void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) +{ + buf->count--; + if (buf->count == 0) { + if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) + BUG(); + radix_tree_delete(&root->cache_radix, buf->blocknr); + memset(buf, 0, sizeof(*buf)); + free(buf); + BUG_ON(allocated_blocks == 0); + allocated_blocks--; + } +} + diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h new file mode 100644 index 00000000000..ee95fa05c4a --- /dev/null +++ b/fs/btrfs/disk-io.h @@ -0,0 +1,21 @@ +#ifndef __DISKIO__ +#define __DISKIO__ + +struct tree_buffer { + u64 blocknr; + int count; + union { + struct node node; + struct leaf leaf; + }; +}; + +struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr); +int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); +struct ctree_root *open_ctree(char *filename); +int close_ctree(struct ctree_root *root); +void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); +struct tree_buffer *alloc_free_block(struct ctree_root *root); +int update_root_block(struct ctree_root *root); + +#endif diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index 3a4bb4d661f..347ca06205c 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -6,6 +6,7 @@ #define BITS_PER_LONG 64 #define __GFP_BITS_SHIFT 20 #define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) +#define GFP_KERNEL 0 #define __read_mostly #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define __force diff --git a/fs/btrfs/radix-tree.c b/fs/btrfs/radix-tree.c new file mode 100644 index 00000000000..baa25ca1c2a --- /dev/null +++ b/fs/btrfs/radix-tree.c @@ -0,0 +1,836 @@ +/* + * Copyright (C) 2001 Momchil Velikov + * Portions Copyright (C) 2001 Christoph Hellwig + * Copyright (C) 2005 SGI, Christoph Lameter + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "kerncompat.h" +#include "radix-tree.h" +#ifdef __KERNEL__ +#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) +#else +#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ +#endif + +#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) +#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) + +#define RADIX_TREE_TAG_LONGS \ + ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) + +struct radix_tree_node { + unsigned int count; + void *slots[RADIX_TREE_MAP_SIZE]; + unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; +}; + +struct radix_tree_path { + struct radix_tree_node *node; + int offset; +}; + +#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) +#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) + +static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; + +/* + * Per-cpu pool of preloaded nodes + */ +struct radix_tree_preload { + int nr; + struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; +}; +struct radix_tree_preload radix_tree_preloads = { 0, }; + +static inline gfp_t root_gfp_mask(struct radix_tree_root *root) +{ + return root->gfp_mask & __GFP_BITS_MASK; +} + +static int internal_nodes = 0; +/* + * This assumes that the caller has performed appropriate preallocation, and + * that the caller has pinned this thread of control to the current CPU. + */ +static struct radix_tree_node * +radix_tree_node_alloc(struct radix_tree_root *root) +{ + struct radix_tree_node *ret; + ret = malloc(sizeof(struct radix_tree_node)); + if (ret) { + memset(ret, 0, sizeof(struct radix_tree_node)); + internal_nodes++; + } + return ret; +} + +static inline void +radix_tree_node_free(struct radix