aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/persistent-data/dm-btree.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/persistent-data/dm-btree.c')
-rw-r--r--drivers/md/persistent-data/dm-btree.c161
1 files changed, 126 insertions, 35 deletions
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index bd1e7ffbe26..416060c2570 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
/*----------------------------------------------------------------*/
/* makes the assumption that no two keys are the same. */
-static int bsearch(struct node *n, uint64_t key, int want_hi)
+static int bsearch(struct btree_node *n, uint64_t key, int want_hi)
{
int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
@@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi)
return want_hi ? hi : lo;
}
-int lower_bound(struct node *n, uint64_t key)
+int lower_bound(struct btree_node *n, uint64_t key)
{
return bsearch(n, key, 0);
}
-void inc_children(struct dm_transaction_manager *tm, struct node *n,
+void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
struct dm_btree_value_type *vt)
{
unsigned i;
@@ -74,11 +74,10 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n,
dm_tm_inc(tm, value64(n, i));
else if (vt->inc)
for (i = 0; i < nr_entries; i++)
- vt->inc(vt->context,
- value_ptr(n, i, vt->size));
+ vt->inc(vt->context, value_ptr(n, i));
}
-static int insert_at(size_t value_size, struct node *node, unsigned index,
+static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
uint64_t key, void *value)
__dm_written_to_disk(value)
{
@@ -123,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
{
int r;
struct dm_block *b;
- struct node *n;
+ struct btree_node *n;
size_t block_size;
uint32_t max_entries;
@@ -155,13 +154,14 @@ EXPORT_SYMBOL_GPL(dm_btree_empty);
#define MAX_SPINE_DEPTH 64
struct frame {
struct dm_block *b;
- struct node *n;
+ struct btree_node *n;
unsigned level;
unsigned nr_children;
unsigned current_child;
};
struct del_stack {
+ struct dm_btree_info *info;
struct dm_transaction_manager *tm;
int top;
struct frame spine[MAX_SPINE_DEPTH];
@@ -184,6 +184,20 @@ static int unprocessed_frames(struct del_stack *s)
return s->top >= 0;
}
+static void prefetch_children(struct del_stack *s, struct frame *f)
+{
+ unsigned i;
+ struct dm_block_manager *bm = dm_tm_get_bm(s->tm);
+
+ for (i = 0; i < f->nr_children; i++)
+ dm_bm_prefetch(bm, value64(f->n, i));
+}
+
+static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
+{
+ return f->level < (info->levels - 1);
+}
+
static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
{
int r;
@@ -206,6 +220,7 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
dm_tm_dec(s->tm, b);
else {
+ uint32_t flags;
struct frame *f = s->spine + ++s->top;
r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
@@ -218,6 +233,10 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
f->level = level;
f->nr_children = le32_to_cpu(f->n->header.nr_entries);
f->current_child = 0;
+
+ flags = le32_to_cpu(f->n->header.flags);
+ if (flags & INTERNAL_NODE || is_internal_level(s->info, f))
+ prefetch_children(s, f);
}
return 0;
@@ -239,10 +258,11 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
+ s->info = info;
s->tm = info->tm;
s->top = -1;
- r = push_frame(s, root, 1);
+ r = push_frame(s, root, 0);
if (r)
goto out;
@@ -268,7 +288,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
if (r)
goto out;
- } else if (f->level != (info->levels - 1)) {
+ } else if (is_internal_level(info, f)) {
b = value64(f->n, f->current_child);
f->current_child++;
r = push_frame(s, b, f->level + 1);
@@ -281,9 +301,9 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
for (i = 0; i < f->nr_children; i++)
info->value_type.dec(info->value_type.context,
- value_ptr(f->n, i, info->value_type.size));
+ value_ptr(f->n, i));
}
- f->current_child = f->nr_children;
+ pop_frame(s);
}
}
@@ -296,7 +316,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del);
/*----------------------------------------------------------------*/
static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
- int (*search_fn)(struct node *, uint64_t),
+ int (*search_fn)(struct btree_node *, uint64_t),
uint64_t *result_key, void *v, size_t value_size)
{
int i, r;
@@ -320,7 +340,7 @@ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
} while (!(flags & LEAF_NODE));
*result_key = le64_to_cpu(ro_node(s)->keys[i]);
- memcpy(v, value_ptr(ro_node(s), i, value_size), value_size);
+ memcpy(v, value_ptr(ro_node(s), i), value_size);
return 0;
}
@@ -407,7 +427,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
size_t size;
unsigned nr_left, nr_right;
struct dm_block *left, *right, *parent;
- struct node *ln, *rn, *pn;
+ struct btree_node *ln, *rn, *pn;
__le64 location;
left = shadow_current(s);
@@ -432,7 +452,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
sizeof(uint64_t) : s->info->value_type.size;
- memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size),
+ memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
size * nr_right);
/*
@@ -443,7 +463,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
pn = dm_block_data(parent);
location = cpu_to_le64(dm_block_location(left));
__dm_bless_for_disk(&location);
- memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)),
+ memcpy_disk(value_ptr(pn, parent_index),
&location, sizeof(__le64));
location = cpu_to_le64(dm_block_location(right));
@@ -492,7 +512,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
size_t size;
unsigned nr_left, nr_right;
struct dm_block *left, *right, *new_parent;
- struct node *pn, *ln, *rn;
+ struct btree_node *pn, *ln, *rn;
__le64 val;
new_parent = shadow_current(s);
@@ -529,8 +549,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
sizeof(__le64) : s->info->value_type.size;
- memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size);
- memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size),
+ memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
+ memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
nr_right * size);
/* new_parent should just point to l and r now */
@@ -545,12 +565,12 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
val = cpu_to_le64(dm_block_location(left));
__dm_bless_for_disk(&val);
pn->keys[0] = ln->keys[0];
- memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64));
+ memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64));
val = cpu_to_le64(dm_block_location(right));
__dm_bless_for_disk(&val);
pn->keys[1] = rn->keys[0];
- memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64));
+ memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
/*
* rejig the spine. This is ugly, since it knows too
@@ -577,7 +597,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
uint64_t key, unsigned *index)
{
int r, i = *index, top = 1;
- struct node *node;
+ struct btree_node *node;
for (;;) {
r = shadow_step(s, root, vt);
@@ -595,7 +615,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
__dm_bless_for_disk(&location);
- memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
+ memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
&location, sizeof(__le64));
}
@@ -644,7 +664,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
unsigned level, index = -1, last_level = info->levels - 1;
dm_block_t block = root;
struct shadow_spine spine;
- struct node *n;
+ struct btree_node *n;
struct dm_btree_value_type le64_type;
le64_type.context = NULL;
@@ -710,12 +730,12 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
(!info->value_type.equal ||
!info->value_type.equal(
info->value_type.context,
- value_ptr(n, index, info->value_type.size),
+ value_ptr(n, index),
value))) {
info->value_type.dec(info->value_type.context,
- value_ptr(n, index, info->value_type.size));
+ value_ptr(n, index));
}
- memcpy_disk(value_ptr(n, index, info->value_type.size),
+ memcpy_disk(value_ptr(n, index),
value, info->value_type.size);
}
@@ -750,8 +770,8 @@ EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
/*----------------------------------------------------------------*/
-static int find_highest_key(struct ro_spine *s, dm_block_t block,
- uint64_t *result_key, dm_block_t *next_block)
+static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest,
+ uint64_t *result_key, dm_block_t *next_block)
{
int i, r;
uint32_t flags;
@@ -768,7 +788,11 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,
else
i--;
- *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ if (find_highest)
+ *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ else
+ *result_key = le64_to_cpu(ro_node(s)->keys[0]);
+
if (next_block || flags & INTERNAL_NODE)
block = value64(ro_node(s), i);
@@ -779,16 +803,16 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,
return 0;
}
-int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
- uint64_t *result_keys)
+static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root,
+ bool find_highest, uint64_t *result_keys)
{
int r = 0, count = 0, level;
struct ro_spine spine;
init_ro_spine(&spine, info);
for (level = 0; level < info->levels; level++) {
- r = find_highest_key(&spine, root, result_keys + level,
- level == info->levels - 1 ? NULL : &root);
+ r = find_key(&spine, root, find_highest, result_keys + level,
+ level == info->levels - 1 ? NULL : &root);
if (r == -ENODATA) {
r = 0;
break;
@@ -802,4 +826,71 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
return r ? r : count;
}
+
+int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ return dm_btree_find_key(info, root, true, result_keys);
+}
EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
+
+int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ return dm_btree_find_key(info, root, false, result_keys);
+}
+EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key);
+
+/*----------------------------------------------------------------*/
+
+/*
+ * FIXME: We shouldn't use a recursive algorithm when we have limited stack
+ * space. Also this only works for single level trees.
+ */
+static int walk_node(struct ro_spine *s, dm_block_t block,
+ int (*fn)(void *context, uint64_t *keys, void *leaf),
+ void *context)
+{
+ int r;
+ unsigned i, nr;
+ struct btree_node *n;
+ uint64_t keys;
+
+ r = ro_step(s, block);
+ n = ro_node(s);
+
+ nr = le32_to_cpu(n->header.nr_entries);
+ for (i = 0; i < nr; i++) {
+ if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
+ r = walk_node(s, value64(n, i), fn, context);
+ if (r)
+ goto out;
+ } else {
+ keys = le64_to_cpu(*key_ptr(n, i));
+ r = fn(context, &keys, value_ptr(n, i));
+ if (r)
+ goto out;
+ }
+ }
+
+out:
+ ro_pop(s);
+ return r;
+}
+
+int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
+ int (*fn)(void *context, uint64_t *keys, void *leaf),
+ void *context)
+{
+ int r;
+ struct ro_spine spine;
+
+ BUG_ON(info->levels > 1);
+
+ init_ro_spine(&spine, info);
+ r = walk_node(&spine, root, fn, context);
+ exit_ro_spine(&spine);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_walk);