aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/bcache/bset.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache/bset.h')
-rw-r--r--drivers/md/bcache/bset.h435
1 files changed, 309 insertions, 126 deletions
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index ae115a253d7..5f6728d5d4d 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -1,7 +1,11 @@
#ifndef _BCACHE_BSET_H
#define _BCACHE_BSET_H
-#include <linux/slab.h>
+#include <linux/bcache.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "util.h" /* for time_stats */
/*
* BKEYS:
@@ -142,17 +146,13 @@
* first key in that range of bytes again.
*/
-/* Btree key comparison/iteration */
+struct btree_keys;
+struct btree_iter;
+struct btree_iter_set;
+struct bkey_float;
#define MAX_BSETS 4U
-struct btree_iter {
- size_t size, used;
- struct btree_iter_set {
- struct bkey *k, *end;
- } data[MAX_BSETS];
-};
-
struct bset_tree {
/*
* We construct a binary tree in an array as if the array
@@ -162,14 +162,14 @@ struct bset_tree {
*/
/* size of the binary tree and prev array */
- unsigned size;
+ unsigned size;
/* function of size - precalculated for to_inorder() */
- unsigned extra;
+ unsigned extra;
/* copy of the last key in the set */
- struct bkey end;
- struct bkey_float *tree;
+ struct bkey end;
+ struct bkey_float *tree;
/*
* The nodes in the bset tree point to specific keys - this
@@ -179,96 +179,227 @@ struct bset_tree {
* to keep bkey_float to 4 bytes and prev isn't used in the fast
* path.
*/
- uint8_t *prev;
+ uint8_t *prev;
/* The actual btree node, with pointers to each sorted set */
- struct bset *data;
+ struct bset *data;
};
-static __always_inline int64_t bkey_cmp(const struct bkey *l,
- const struct bkey *r)
+struct btree_keys_ops {
+ bool (*sort_cmp)(struct btree_iter_set,
+ struct btree_iter_set);
+ struct bkey *(*sort_fixup)(struct btree_iter *, struct bkey *);
+ bool (*insert_fixup)(struct btree_keys *, struct bkey *,
+ struct btree_iter *, struct bkey *);
+ bool (*key_invalid)(struct btree_keys *,
+ const struct bkey *);
+ bool (*key_bad)(struct btree_keys *, const struct bkey *);
+ bool (*key_merge)(struct btree_keys *,
+ struct bkey *, struct bkey *);
+ void (*key_to_text)(char *, size_t, const struct bkey *);
+ void (*key_dump)(struct btree_keys *, const struct bkey *);
+
+ /*
+ * Only used for deciding whether to use START_KEY(k) or just the key
+ * itself in a couple places
+ */
+ bool is_extents;
+};
+
+struct btree_keys {
+ const struct btree_keys_ops *ops;
+ uint8_t page_order;
+ uint8_t nsets;
+ unsigned last_set_unwritten:1;
+ bool *expensive_debug_checks;
+
+ /*
+ * Sets of sorted keys - the real btree node - plus a binary search tree
+ *
+ * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
+ * to the memory we have allocated for this btree node. Additionally,
+ * set[0]->data points to the entire btree node as it exists on disk.
+ */
+ struct bset_tree set[MAX_BSETS];
+};
+
+static inline struct bset_tree *bset_tree_last(struct btree_keys *b)
{
- return unlikely(KEY_INODE(l) != KEY_INODE(r))
- ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r)
- : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
+ return b->set + b->nsets;
}
-static inline size_t bkey_u64s(const struct bkey *k)
+static inline bool bset_written(struct btree_keys *b, struct bset_tree *t)
{
- BUG_ON(KEY_CSUM(k) > 1);
- return 2 + KEY_PTRS(k) + (KEY_CSUM(k) ? 1 : 0);
+ return t <= b->set + b->nsets - b->last_set_unwritten;
}
-static inline size_t bkey_bytes(const struct bkey *k)
+static inline bool bkey_written(struct btree_keys *b, struct bkey *k)
{
- return bkey_u64s(k) * sizeof(uint64_t);
+ return !b->last_set_unwritten || k < b->set[b->nsets].data->start;
}
-static inline void bkey_copy(struct bkey *dest, const struct bkey *src)
+static inline unsigned bset_byte_offset(struct btree_keys *b, struct bset *i)
{
- memcpy(dest, src, bkey_bytes(src));
+ return ((size_t) i) - ((size_t) b->set->data);
}
-static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
+static inline unsigned bset_sector_offset(struct btree_keys *b, struct bset *i)
{
- if (!src)
- src = &KEY(0, 0, 0);
+ return bset_byte_offset(b, i) >> 9;
+}
+
+#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t))
+#define set_bytes(i) __set_bytes(i, i->keys)
+
+#define __set_blocks(i, k, block_bytes) \
+ DIV_ROUND_UP(__set_bytes(i, k), block_bytes)
+#define set_blocks(i, block_bytes) \
+ __set_blocks(i, (i)->keys, block_bytes)
- SET_KEY_INODE(dest, KEY_INODE(src));
- SET_KEY_OFFSET(dest, KEY_OFFSET(src));
+static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b)
+{
+ struct bset_tree *t = bset_tree_last(b);
+
+ BUG_ON((PAGE_SIZE << b->page_order) <
+ (bset_byte_offset(b, t->data) + set_bytes(t->data)));
+
+ if (!b->last_set_unwritten)
+ return 0;
+
+ return ((PAGE_SIZE << b->page_order) -
+ (bset_byte_offset(b, t->data) + set_bytes(t->data))) /
+ sizeof(u64);
}
-static inline struct bkey *bkey_next(const struct bkey *k)
+static inline struct bset *bset_next_set(struct btree_keys *b,
+ unsigned block_bytes)
{
- uint64_t *d = (void *) k;
- return (struct bkey *) (d + bkey_u64s(k));
+ struct bset *i = bset_tree_last(b)->data;
+
+ return ((void *) i) + roundup(set_bytes(i), block_bytes);
}
-/* Keylists */
+void bch_btree_keys_free(struct btree_keys *);
+int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t);
+void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
+ bool *);
+
+void bch_bset_init_next(struct btree_keys *, struct bset *, uint64_t);
+void bch_bset_build_written_tree(struct btree_keys *);
+void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *);
+bool bch_bkey_try_merge(struct btree_keys *, struct bkey *, struct bkey *);
+void bch_bset_insert(struct btree_keys *, struct bkey *, struct bkey *);
+unsigned bch_btree_insert_key(struct btree_keys *, struct bkey *,
+ struct bkey *);
+
+enum {
+ BTREE_INSERT_STATUS_NO_INSERT = 0,
+ BTREE_INSERT_STATUS_INSERT,
+ BTREE_INSERT_STATUS_BACK_MERGE,
+ BTREE_INSERT_STATUS_OVERWROTE,
+ BTREE_INSERT_STATUS_FRONT_MERGE,
+};
-struct keylist {
- struct bkey *top;
- union {
- uint64_t *list;
- struct bkey *bottom;
- };
+/* Btree key iteration */
- /* Enough room for btree_split's keys without realloc */
-#define KEYLIST_INLINE 16
- uint64_t d[KEYLIST_INLINE];
+struct btree_iter {
+ size_t size, used;
+#ifdef CONFIG_BCACHE_DEBUG
+ struct btree_keys *b;
+#endif
+ struct btree_iter_set {
+ struct bkey *k, *end;
+ } data[MAX_BSETS];
};
-static inline void bch_keylist_init(struct keylist *l)
+typedef bool (*ptr_filter_fn)(struct btree_keys *, const struct bkey *);
+
+struct bkey *bch_btree_iter_next(struct btree_iter *);
+struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
+ struct btree_keys *, ptr_filter_fn);
+
+void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
+struct bkey *bch_btree_iter_init(struct btree_keys *, struct btree_iter *,
+ struct bkey *);
+
+struct bkey *__bch_bset_search(struct btree_keys *, struct bset_tree *,
+ const struct bkey *);
+
+/*
+ * Returns the first key that is strictly greater than search
+ */
+static inline struct bkey *bch_bset_search(struct btree_keys *b,
+ struct bset_tree *t,
+ const struct bkey *search)
{
- l->top = (void *) (l->list = l->d);
+ return search ? __bch_bset_search(b, t, search) : t->data->start;
}
-static inline void bch_keylist_push(struct keylist *l)
+#define for_each_key_filter(b, k, iter, filter) \
+ for (bch_btree_iter_init((b), (iter), NULL); \
+ ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
+
+#define for_each_key(b, k, iter) \
+ for (bch_btree_iter_init((b), (iter), NULL); \
+ ((k) = bch_btree_iter_next(iter));)
+
+/* Sorting */
+
+struct bset_sort_state {
+ mempool_t *pool;
+
+ unsigned page_order;
+ unsigned crit_factor;
+
+ struct time_stats time;
+};
+
+void bch_bset_sort_state_free(struct bset_sort_state *);
+int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
+void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *);
+void bch_btree_sort_into(struct btree_keys *, struct btree_keys *,
+ struct bset_sort_state *);
+void bch_btree_sort_and_fix_extents(struct btree_keys *, struct btree_iter *,
+ struct bset_sort_state *);
+void bch_btree_sort_partial(struct btree_keys *, unsigned,
+ struct bset_sort_state *);
+
+static inline void bch_btree_sort(struct btree_keys *b,
+ struct bset_sort_state *state)
{
- l->top = bkey_next(l->top);
+ bch_btree_sort_partial(b, 0, state);
}
-static inline void bch_keylist_add(struct keylist *l, struct bkey *k)
+struct bset_stats {
+ size_t sets_written, sets_unwritten;
+ size_t bytes_written, bytes_unwritten;
+ size_t floats, failed;
+};
+
+void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
+
+/* Bkey utility code */
+
+#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
+
+static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
{
- bkey_copy(l->top, k);
- bch_keylist_push(l);
+ return bkey_idx(i->start, idx);
}
-static inline bool bch_keylist_empty(struct keylist *l)
+static inline void bkey_init(struct bkey *k)
{
- return l->top == (void *) l->list;
+ *k = ZERO_KEY;
}
-static inline void bch_keylist_free(struct keylist *l)
+static __always_inline int64_t bkey_cmp(const struct bkey *l,
+ const struct bkey *r)
{
- if (l->list != l->d)
- kfree(l->list);
+ return unlikely(KEY_INODE(l) != KEY_INODE(r))
+ ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r)
+ : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
}
-void bch_keylist_copy(struct keylist *, struct keylist *);
-struct bkey *bch_keylist_pop(struct keylist *);
-int bch_keylist_realloc(struct keylist *, int, struct cache_set *);
-
void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
unsigned);
bool __bch_cut_front(const struct bkey *, struct bkey *);
@@ -286,98 +417,150 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
return __bch_cut_back(where, k);
}
-const char *bch_ptr_status(struct cache_set *, const struct bkey *);
-bool __bch_ptr_invalid(struct cache_set *, int level, const struct bkey *);
-bool bch_ptr_bad(struct btree *, const struct bkey *);
-
-static inline uint8_t gen_after(uint8_t a, uint8_t b)
+#define PRECEDING_KEY(_k) \
+({ \
+ struct bkey *_ret = NULL; \
+ \
+ if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
+ _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
+ \
+ if (!_ret->low) \
+ _ret->high--; \
+ _ret->low--; \
+ } \
+ \
+ _ret; \
+})
+
+static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
{
- uint8_t r = a - b;
- return r > 128U ? 0 : r;
+ return b->ops->key_invalid(b, k);
}
-static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
- unsigned i)
+static inline bool bch_ptr_bad(struct btree_keys *b, const struct bkey *k)
{
- return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
+ return b->ops->key_bad(b, k);
}
-static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
- unsigned i)
+static inline void bch_bkey_to_text(struct btree_keys *b, char *buf,
+ size_t size, const struct bkey *k)
{
- return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
+ return b->ops->key_to_text(buf, size, k);
}
+static inline bool bch_bkey_equal_header(const struct bkey *l,
+ const struct bkey *r)
+{
+ return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
+ KEY_PTRS(l) == KEY_PTRS(r) &&
+ KEY_CSUM(l) == KEY_CSUM(l));
+}
-typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);
+/* Keylists */
-struct bkey *bch_next_recurse_key(struct btree *, struct bkey *);
-struct bkey *bch_btree_iter_next(struct btree_iter *);
-struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
- struct btree *, ptr_filter_fn);
+struct keylist {
+ union {
+ struct bkey *keys;
+ uint64_t *keys_p;
+ };
+ union {
+ struct bkey *top;
+ uint64_t *top_p;
+ };
-void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
-struct bkey *__bch_btree_iter_init(struct btree *, struct btree_iter *,
- struct bkey *, struct bset_tree *);
+ /* Enough room for btree_split's keys without realloc */
+#define KEYLIST_INLINE 16
+ uint64_t inline_keys[KEYLIST_INLINE];
+};
-/* 32 bits total: */
-#define BKEY_MID_BITS 3
-#define BKEY_EXPONENT_BITS 7
-#define BKEY_MANTISSA_BITS 22
-#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
+static inline void bch_keylist_init(struct keylist *l)
+{
+ l->top_p = l->keys_p = l->inline_keys;
+}
-struct bkey_float {
- unsigned exponent:BKEY_EXPONENT_BITS;
- unsigned m:BKEY_MID_BITS;
- unsigned mantissa:BKEY_MANTISSA_BITS;
-} __packed;
+static inline void bch_keylist_init_single(struct keylist *l, struct bkey *k)
+{
+ l->keys = k;
+ l->top = bkey_next(k);
+}
-/*
- * BSET_CACHELINE was originally intended to match the hardware cacheline size -
- * it used to be 64, but I realized the lookup code would touch slightly less
- * memory if it was 128.
- *
- * It definites the number of bytes (in struct bset) per struct bkey_float in
- * the auxiliar search tree - when we're done searching the bset_float tree we
- * have this many bytes left that we do a linear search over.
- *
- * Since (after level 5) every level of the bset_tree is on a new cacheline,
- * we're touching one fewer cacheline in the bset tree in exchange for one more
- * cacheline in the linear search - but the linear search might stop before it
- * gets to the second cacheline.
- */
+static inline void bch_keylist_push(struct keylist *l)
+{
+ l->top = bkey_next(l->top);
+}
-#define BSET_CACHELINE 128
-#define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE)
+static inline void bch_keylist_add(struct keylist *l, struct bkey *k)
+{
+ bkey_copy(l->top, k);
+ bch_keylist_push(l);
+}
-#define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float))
-#define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t))
+static inline bool bch_keylist_empty(struct keylist *l)
+{
+ return l->top == l->keys;
+}
-void bch_bset_init_next(struct btree *);
+static inline void bch_keylist_reset(struct keylist *l)
+{
+ l->top = l->keys;
+}
-void bch_bset_fix_invalidated_key(struct btree *, struct bkey *);
-void bch_bset_fix_lookup_table(struct btree *, struct bkey *);
+static inline void bch_keylist_free(struct keylist *l)
+{
+ if (l->keys_p != l->inline_keys)
+ kfree(l->keys_p);
+}
-struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
- const struct bkey *);
+static inline size_t bch_keylist_nkeys(struct keylist *l)
+{
+ return l->top_p - l->keys_p;
+}
-static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
- const struct bkey *search)
+static inline size_t bch_keylist_bytes(struct keylist *l)
{
- return search ? __bch_bset_search(b, t, search) : t->data->start;
+ return bch_keylist_nkeys(l) * sizeof(uint64_t);
}
-bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
-void bch_btree_sort_lazy(struct btree *);
-void bch_btree_sort_into(struct btree *, struct btree *);
-void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *);
-void bch_btree_sort_partial(struct btree *, unsigned);
+struct bkey *bch_keylist_pop(struct keylist *);
+void bch_keylist_pop_front(struct keylist *);
+int __bch_keylist_realloc(struct keylist *, unsigned);
+
+/* Debug stuff */
+
+#ifdef CONFIG_BCACHE_DEBUG
+
+int __bch_count_data(struct btree_keys *);
+void __bch_check_keys(struct btree_keys *, const char *, ...);
+void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
+void bch_dump_bucket(struct btree_keys *);
+
+#else
+
+static inline int __bch_count_data(struct btree_keys *b) { return -1; }
+static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
+static inline void bch_dump_bucket(struct btree_keys *b) {}
+void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
+
+#endif
+
+static inline bool btree_keys_expensive_checks(struct btree_keys *b)
+{
+#ifdef CONFIG_BCACHE_DEBUG
+ return *b->expensive_debug_checks;
+#else
+ return false;
+#endif
+}
-static inline void bch_btree_sort(struct btree *b)
+static inline int bch_count_data(struct btree_keys *b)
{
- bch_btree_sort_partial(b, 0);
+ return btree_keys_expensive_checks(b) ? __bch_count_data(b) : -1;
}
-int bch_bset_print_stats(struct cache_set *, char *);
+#define bch_check_keys(b, ...) \
+do { \
+ if (btree_keys_expensive_checks(b)) \
+ __bch_check_keys(b, __VA_ARGS__); \
+} while (0)
#endif