From 85cbe1f88cb189322e3e4ef98816c19ab12161ea Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Feb 2014 13:44:06 -0800 Subject: bcache: Fix another compiler warning on m68k Use a bigger hammer this time Signed-off-by: Kent Overstreet Cc: linux-stable --- drivers/md/bcache/bset.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 3f74b4b0747..54541641530 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -23,8 +23,8 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) for (k = i->start; k < bset_bkey_last(i); k = next) { next = bkey_next(k); - printk(KERN_ERR "block %u key %li/%u: ", set, - (uint64_t *) k - i->d, i->keys); + printk(KERN_ERR "block %u key %u/%u: ", set, + (unsigned) ((u64 *) k - i->d), i->keys); if (b->ops->key_dump) b->ops->key_dump(b, k); -- cgit v1.2.3-18-g5258 From 1b4eaf3d3809a658c85911e92d9ff64086931efa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Jan 2014 15:04:18 -0800 Subject: bcache: Fix flash_dev_cache_miss() for real this time The code was using sectors to count the number of sectors it was zeroing... but then it passed it to bio_advance()... after it had been set to 0. Amusing... Signed-off-by: Kent Overstreet --- drivers/md/bcache/request.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 5d5d031cf38..fc14ba3f6d0 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1203,22 +1203,13 @@ void bch_cached_dev_request_init(struct cached_dev *dc) static int flash_dev_cache_miss(struct btree *b, struct search *s, struct bio *bio, unsigned sectors) { - struct bio_vec bv; - struct bvec_iter iter; - - /* Zero fill bio */ - - bio_for_each_segment(bv, bio, iter) { - unsigned j = min(bv.bv_len >> 9, sectors); - - void *p = kmap(bv.bv_page); - memset(p + bv.bv_offset, 0, j << 9); - kunmap(bv.bv_page); + unsigned bytes = min(sectors, bio_sectors(bio)) << 9; - sectors -= j; - } + swap(bio->bi_iter.bi_size, bytes); + zero_fill_bio(bio); + swap(bio->bi_iter.bi_size, bytes); - bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size)); + bio_advance(bio, bytes); if (!bio->bi_iter.bi_size) return MAP_DONE; -- cgit v1.2.3-18-g5258 From dabb44334060b4b84051b34c58573e57cc7432b2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Feb 2014 19:48:26 -0800 Subject: bcache: Fix a shutdown bug Shutdown wasn't cancelling/waiting on journal_write_work() Signed-off-by: Kent Overstreet --- drivers/md/bcache/journal.c | 9 +++++++-- drivers/md/bcache/journal.h | 1 + drivers/md/bcache/super.c | 4 ++++ 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 18039affc30..e38c5997bf1 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -536,6 +536,7 @@ void bch_journal_next(struct journal *j) atomic_set(&fifo_back(&j->pin), 1); j->cur->data->seq = ++j->seq; + j->cur->dirty = false; j->cur->need_write = false; j->cur->data->keys = 0; @@ -731,7 +732,10 @@ static void journal_write_work(struct work_struct *work) struct cache_set, journal.work); spin_lock(&c->journal.lock); - journal_try_write(c); + if (c->journal.cur->dirty) + journal_try_write(c); + else + spin_unlock(&c->journal.lock); } /* @@ -761,7 +765,8 @@ atomic_t *bch_journal(struct cache_set *c, if (parent) { closure_wait(&w->wait, parent); journal_try_write(c); - } else if (!w->need_write) { + } else if (!w->dirty) { + w->dirty = true; schedule_delayed_work(&c->journal.work, msecs_to_jiffies(c->journal_delay_ms)); spin_unlock(&c->journal.lock); diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index 9180c446507..e3c39457afb 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -95,6 +95,7 @@ struct journal_write { struct cache_set *c; struct closure_waitlist wait; + bool dirty; bool need_write; }; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 24a3a1546ca..c70521fe57a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1403,6 +1403,10 @@ static void cache_set_flush(struct closure *cl) if (ca->alloc_thread) kthread_stop(ca->alloc_thread); + cancel_delayed_work_sync(&c->journal.work); + /* flush last journal entry if needed */ + c->journal.work.work.func(&c->journal.work.work); + closure_return(cl); } -- cgit v1.2.3-18-g5258 From 4fa03402cda2fac1a54248c7578b939d95931dc0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Mar 2014 18:58:55 -0700 Subject: bcache: Fix a lockdep splat in an error path Signed-off-by: Kent Overstreet --- drivers/md/bcache/super.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c70521fe57a..5136e11eadb 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1873,7 +1873,10 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page, if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) goto err; + mutex_lock(&bch_register_lock); err = register_cache_set(ca); + mutex_unlock(&bch_register_lock); + if (err) goto err; @@ -1935,8 +1938,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!try_module_get(THIS_MODULE)) return -EBUSY; - mutex_lock(&bch_register_lock); - if (!(path = kstrndup(buffer, size, GFP_KERNEL)) || !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL))) goto err; @@ -1969,7 +1970,9 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!dc) goto err_close; + mutex_lock(&bch_register_lock); register_bdev(sb, sb_page, bdev, dc); + mutex_unlock(&bch_register_lock); } else { struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) @@ -1982,7 +1985,6 @@ out: put_page(sb_page); kfree(sb); kfree(path); - mutex_unlock(&bch_register_lock); module_put(THIS_MODULE); return ret; -- cgit v1.2.3-18-g5258 From 65ddf45a3102916fb622c71f7af158b19d49dc7f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 24 Feb 2014 19:55:28 -0800 Subject: bcache: Fix a null ptr deref in journal replay Signed-off-by: Kent Overstreet --- drivers/md/bcache/journal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index e38c5997bf1..97e6a92da99 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -287,9 +287,13 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) k < bset_bkey_last(&i->j); k = bkey_next(k)) { unsigned j; + struct bucket *g; for (j = 0; j < KEY_PTRS(k); j++) { - struct bucket *g = PTR_BUCKET(c, k, j); + if (!ptr_available(c, k, j)) + continue; + + g = PTR_BUCKET(c, k, j); atomic_inc(&g->pin); if (g->prio == BTREE_PRIO && -- cgit v1.2.3-18-g5258 From 27201cfdaa2aeb571191494c1bae6863ffb04108 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2014 13:44:21 -0700 Subject: bcache: Fix a journalling reclaim after recovery bug On recovery we weren't correctly keeping track of what journal buckets had open journal entries, thus it was possible for them to be overwritten until we'd written all new journal entries. Signed-off-by: Kent Overstreet --- drivers/md/bcache/journal.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 97e6a92da99..4152a911989 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -237,8 +237,14 @@ bsearch: for (i = 0; i < ca->sb.njournal_buckets; i++) if (ja->seq[i] > seq) { seq = ja->seq[i]; - ja->cur_idx = ja->discard_idx = - ja->last_idx = i; + /* + * When journal_reclaim() goes to allocate for + * the first time, it'll use the bucket after + * ja->cur_idx + */ + ja->cur_idx = i; + ja->last_idx = ja->discard_idx = (i + 1) % + ca->sb.njournal_buckets; } } -- cgit v1.2.3-18-g5258 From 0bd143fd800055b1db756693289bbebdb93f2a73 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Mar 2014 17:56:24 -0800 Subject: bcache: Fix a bug recovering from unclean shutdown The code to fixup incorrect bucket prios incorrectly did not skip btree node freeing keys Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 5f9c2a665ca..2d4a864865e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1726,9 +1726,9 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op, !ptr_stale(b->c, k, i)) { g->gen = PTR_GEN(k, i); - if (b->level) + if (b->level && bkey_cmp(k, &ZERO_KEY)) g->prio = BTREE_PRIO; - else if (g->prio == BTREE_PRIO) + else if (!b->level && g->prio == BTREE_PRIO) g->prio = INITIAL_PRIO; } } -- cgit v1.2.3-18-g5258 From 487dded86ea065317aea121bec8f1816f2f235c9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Mar 2014 15:13:26 -0700 Subject: bcache: Fix another bug recovering from unclean shutdown The on disk bucket gens are allowed to be out of date, when we reuse buckets that didn't have any live data in them. To deal with this, the initial gc has to update the bucket gen when we find a pointer gen newer than the bucket's gen. Unfortunately we weren't doing this for pointers in the journal that we're about to replay. Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 82 +++++++++++++++++---------------------------- drivers/md/bcache/btree.h | 2 +- drivers/md/bcache/journal.c | 17 +++------- 3 files changed, 36 insertions(+), 65 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 2d4a864865e..5f587ce57e3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1126,7 +1126,8 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op) /* Garbage collection */ -uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) +static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, + struct bkey *k) { uint8_t stale = 0; unsigned i; @@ -1177,6 +1178,26 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) #define btree_mark_key(b, k) __bch_btree_mark_key(b->c, b->level, k) +void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k) +{ + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) + if (ptr_available(c, k, i) && + !ptr_stale(c, k, i)) { + struct bucket *b = PTR_BUCKET(c, k, i); + + b->gen = PTR_GEN(k, i); + + if (level && bkey_cmp(k, &ZERO_KEY)) + b->prio = BTREE_PRIO; + else if (!level && b->prio == BTREE_PRIO) + b->prio = INITIAL_PRIO; + } + + __bch_btree_mark_key(c, level, k); +} + static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) { uint8_t stale = 0; @@ -1511,6 +1532,8 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, } } + __bch_btree_mark_key(b->c, b->level + 1, &b->key); + if (b->level) { ret = btree_gc_recurse(b, op, writes, gc); if (ret) @@ -1561,11 +1584,6 @@ size_t bch_btree_gc_finish(struct cache_set *c) c->gc_mark_valid = 1; c->need_gc = 0; - if (c->root) - for (i = 0; i < KEY_PTRS(&c->root->key); i++) - SET_GC_MARK(PTR_BUCKET(c, &c->root->key, i), - GC_MARK_METADATA); - for (i = 0; i < KEY_PTRS(&c->uuid_bucket); i++) SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i), GC_MARK_METADATA); @@ -1705,36 +1723,16 @@ int bch_gc_thread_start(struct cache_set *c) /* Initial partial gc */ -static int bch_btree_check_recurse(struct btree *b, struct btree_op *op, - unsigned long **seen) +static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) { int ret = 0; - unsigned i; struct bkey *k, *p = NULL; - struct bucket *g; struct btree_iter iter; - for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { - for (i = 0; i < KEY_PTRS(k); i++) { - if (!ptr_available(b->c, k, i)) - continue; - - g = PTR_BUCKET(b->c, k, i); - - if (!__test_and_set_bit(PTR_BUCKET_NR(b->c, k, i), - seen[PTR_DEV(k, i)]) || - !ptr_stale(b->c, k, i)) { - g->gen = PTR_GEN(k, i); - - if (b->level && bkey_cmp(k, &ZERO_KEY)) - g->prio = BTREE_PRIO; - else if (!b->level && g->prio == BTREE_PRIO) - g->prio = INITIAL_PRIO; - } - } + for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) + bch_initial_mark_key(b->c, b->level, k); - btree_mark_key(b, k); - } + bch_initial_mark_key(b->c, b->level + 1, &b->key); if (b->level) { bch_btree_iter_init(&b->keys, &iter, NULL); @@ -1746,40 +1744,22 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op, btree_node_prefetch(b->c, k, b->level - 1); if (p) - ret = btree(check_recurse, p, b, op, seen); + ret = btree(check_recurse, p, b, op); p = k; } while (p && !ret); } - return 0; + return ret; } int bch_btree_check(struct cache_set *c) { - int ret = -ENOMEM; - unsigned i; - unsigned long *seen[MAX_CACHES_PER_SET]; struct btree_op op; - memset(seen, 0, sizeof(seen)); bch_btree_op_init(&op, SHRT_MAX); - for (i = 0; c->cache[i]; i++) { - size_t n = DIV_ROUND_UP(c->cache[i]->sb.nbuckets, 8); - seen[i] = kmalloc(n, GFP_KERNEL); - if (!seen[i]) - goto err; - - /* Disables the seen array until prio_read() uses it too */ - memset(seen[i], 0xFF, n); - } - - ret = btree_root(check_recurse, c, &op, seen); -err: - for (i = 0; i < MAX_CACHES_PER_SET; i++) - kfree(seen[i]); - return ret; + return btree_root(check_recurse, c, &op); } /* Btree insertion */ diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index af065e97e55..def9dc4a822 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -251,7 +251,7 @@ int bch_gc_thread_start(struct cache_set *); size_t bch_btree_gc_finish(struct cache_set *); void bch_moving_gc(struct cache_set *); int bch_btree_check(struct cache_set *); -uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); +void bch_initial_mark_key(struct cache_set *, int, struct bkey *); static inline void wake_up_gc(struct cache_set *c) { diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 4152a911989..cf8e0932aad 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -293,21 +293,12 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) k < bset_bkey_last(&i->j); k = bkey_next(k)) { unsigned j; - struct bucket *g; - for (j = 0; j < KEY_PTRS(k); j++) { - if (!ptr_available(c, k, j)) - continue; + for (j = 0; j < KEY_PTRS(k); j++) + if (ptr_available(c, k, j)) + atomic_inc(&PTR_BUCKET(c, k, j)->pin); - g = PTR_BUCKET(c, k, j); - atomic_inc(&g->pin); - - if (g->prio == BTREE_PRIO && - !ptr_stale(c, k, j)) - g->prio = INITIAL_PRIO; - } - - __bch_btree_mark_key(c, 0, k); + bch_initial_mark_key(c, 0, k); } } } -- cgit v1.2.3-18-g5258 From 90db6919f5f1614d1b7a92052445506bc6c564d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Feb 2014 17:26:40 -0800 Subject: bcache: Fix discard granularity blk_stack_limits() doesn't like a discard granularity of 0. Signed-off-by: Kent Overstreet --- drivers/md/bcache/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 5136e11eadb..fb343276bee 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -843,6 +843,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, q->limits.max_segment_size = UINT_MAX; q->limits.max_segments = BIO_MAX_PAGES; q->limits.max_discard_sectors = UINT_MAX; + q->limits.discard_granularity = 512; q->limits.io_min = block_size; q->limits.logical_block_size = block_size; q->limits.physical_block_size = block_size; -- cgit v1.2.3-18-g5258 From da415a096fc06e49d1a15f7a06bcfe6ad44c5d38 Mon Sep 17 00:00:00 2001 From: Nicholas Swenson Date: Thu, 9 Jan 2014 16:03:04 -0800 Subject: bcache: Fix moving_gc deadlocking with a foreground write Deadlock happened because a foreground write slept, waiting for a bucket to be allocated. Normally the gc would mark buckets available for invalidation. But the moving_gc was stuck waiting for outstanding writes to complete. These writes used the bcache_wq, the same queue foreground writes used. This fix gives moving_gc its own work queue, so it was still finish moving even if foreground writes are stuck waiting for allocation. It also makes work queue a parameter to the data_insert path, so moving_gc can use its workqueue for writes. Signed-off-by: Nicholas Swenson Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 2 ++ drivers/md/bcache/movinggc.c | 5 +++-- drivers/md/bcache/request.c | 13 +++++++------ drivers/md/bcache/request.h | 1 + drivers/md/bcache/super.c | 3 +++ 5 files changed, 16 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index a4c7306ff43..6d814f463d9 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -628,6 +628,8 @@ struct cache_set { /* Number of moving GC bios in flight */ struct semaphore moving_in_flight; + struct workqueue_struct *moving_gc_wq; + struct btree *root; #ifdef CONFIG_BCACHE_DEBUG diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 9eb60d102de..8c7205186d0 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -115,7 +115,7 @@ static void write_moving(struct closure *cl) closure_call(&op->cl, bch_data_insert, NULL, cl); } - continue_at(cl, write_moving_finish, system_wq); + continue_at(cl, write_moving_finish, op->wq); } static void read_moving_submit(struct closure *cl) @@ -125,7 +125,7 @@ static void read_moving_submit(struct closure *cl) bch_submit_bbio(bio, io->op.c, &io->w->key, 0); - continue_at(cl, write_moving, system_wq); + continue_at(cl, write_moving, io->op.wq); } static void read_moving(struct cache_set *c) @@ -160,6 +160,7 @@ static void read_moving(struct cache_set *c) io->w = w; io->op.inode = KEY_INODE(&w->key); io->op.c = c; + io->op.wq = c->moving_gc_wq; moving_init(io); bio = &io->bio.bio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index fc14ba3f6d0..3e880869871 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -248,7 +248,7 @@ static void bch_data_insert_keys(struct closure *cl) atomic_dec_bug(journal_ref); if (!op->insert_data_done) - continue_at(cl, bch_data_insert_start, bcache_wq); + continue_at(cl, bch_data_insert_start, op->wq); bch_keylist_free(&op->insert_keys); closure_return(cl); @@ -297,7 +297,7 @@ static void bch_data_invalidate(struct closure *cl) op->insert_data_done = true; bio_put(bio); out: - continue_at(cl, bch_data_insert_keys, bcache_wq); + continue_at(cl, bch_data_insert_keys, op->wq); } static void bch_data_insert_error(struct closure *cl) @@ -340,7 +340,7 @@ static void bch_data_insert_endio(struct bio *bio, int error) if (op->writeback) op->error = error; else if (!op->replace) - set_closure_fn(cl, bch_data_insert_error, bcache_wq); + set_closure_fn(cl, bch_data_insert_error, op->wq); else set_closure_fn(cl, NULL, NULL); } @@ -376,7 +376,7 @@ static void bch_data_insert_start(struct closure *cl) if (bch_keylist_realloc(&op->insert_keys, 3 + (op->csum ? 1 : 0), op->c)) - continue_at(cl, bch_data_insert_keys, bcache_wq); + continue_at(cl, bch_data_insert_keys, op->wq); k = op->insert_keys.top; bkey_init(k); @@ -413,7 +413,7 @@ static void bch_data_insert_start(struct closure *cl) } while (n != bio); op->insert_data_done = true; - continue_at(cl, bch_data_insert_keys, bcache_wq); + continue_at(cl, bch_data_insert_keys, op->wq); err: /* bch_alloc_sectors() blocks if s->writeback = true */ BUG_ON(op->writeback); @@ -442,7 +442,7 @@ err: bio_put(bio); if (!bch_keylist_empty(&op->insert_keys)) - continue_at(cl, bch_data_insert_keys, bcache_wq); + continue_at(cl, bch_data_insert_keys, op->wq); else closure_return(cl); } @@ -824,6 +824,7 @@ static inline struct search *search_alloc(struct bio *bio, s->iop.error = 0; s->iop.flags = 0; s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; + s->iop.wq = bcache_wq; return s; } diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 39f21dbedc3..c117c4082aa 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -7,6 +7,7 @@ struct data_insert_op { struct closure cl; struct cache_set *c; struct bio *bio; + struct workqueue_struct *wq; unsigned inode; uint16_t write_point; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index fb343276bee..ddfde380b49 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1356,6 +1356,8 @@ static void cache_set_free(struct closure *cl) bch_bset_sort_state_free(&c->sort); free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); + if (c->moving_gc_wq) + destroy_workqueue(c->moving_gc_wq); if (c->bio_split) bioset_free(c->bio_split); if (c->fill_iter) @@ -1522,6 +1524,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || + !(c->moving_gc_wq = create_workqueue("bcache_gc")) || bch_journal_alloc(c) || bch_btree_cache_alloc(c) || bch_open_buckets_alloc(c) || -- cgit v1.2.3-18-g5258 From 10d9dcf6ee5909e1aabd3685c60fdd1b1306d046 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Feb 2014 15:48:36 -0800 Subject: bcache: Fix moving_pred() Avoid a potential null pointer deref (e.g. from check keys for cache misses) Signed-off-by: Kent Overstreet --- drivers/md/bcache/movinggc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 8c7205186d0..5e8e58701d3 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -24,12 +24,10 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k) moving_gc_keys); unsigned i; - for (i = 0; i < KEY_PTRS(k); i++) { - struct bucket *g = PTR_BUCKET(c, k, i); - - if (GC_MOVE(g)) + for (i = 0; i < KEY_PTRS(k); i++) + if (ptr_available(c, k, i) && + GC_MOVE(PTR_BUCKET(c, k, i))) return true; - } return false; } -- cgit v1.2.3-18-g5258 From 3f6ef38110b6955327fea3105f004a3b61a3f65f Mon Sep 17 00:00:00 2001 From: Nicholas Swenson Date: Thu, 23 Jan 2014 15:21:02 -0800 Subject: bcache: stop moving_gc marking buckets that can't be moved. Signed-off-by: Nicholas Swenson --- drivers/md/bcache/movinggc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 5e8e58701d3..cd7490311e5 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -215,7 +215,10 @@ void bch_moving_gc(struct cache_set *c) ca->heap.used = 0; for_each_bucket(b, ca) { - if (!GC_SECTORS_USED(b)) + if (GC_MARK(b) == GC_MARK_METADATA || + !GC_SECTORS_USED(b) || + GC_SECTORS_USED(b) == ca->sb.bucket_size || + atomic_read(&b->pin)) continue; if (!heap_full(&ca->heap)) { -- cgit v1.2.3-18-g5258 From 3f5e0a34daed197aa55d0c6b466bb4cd03babb4f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Jan 2014 04:42:58 -0800 Subject: bcache: Kill dead cgroup code This hasn't been used or even enabled in ages. Signed-off-by: Kent Overstreet --- drivers/md/bcache/Kconfig | 8 --- drivers/md/bcache/btree.c | 4 -- drivers/md/bcache/request.c | 169 -------------------------------------------- drivers/md/bcache/request.h | 18 ----- drivers/md/bcache/stats.c | 3 - 5 files changed, 202 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index 2638417b19a..4d200883c50 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -24,11 +24,3 @@ config BCACHE_CLOSURES_DEBUG Keeps all active closures in a linked list and provides a debugfs interface to list them, which makes it possible to see asynchronous operations that get stuck. - -# cgroup code needs to be updated: -# -#config CGROUP_BCACHE -# bool "Cgroup controls for bcache" -# depends on BCACHE && BLK_CGROUP -# ---help--- -# TODO diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 5f587ce57e3..ea5a59e2d74 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -68,15 +68,11 @@ * alloc_bucket() cannot fail. This should be true but is not completely * obvious. * - * Make sure all allocations get charged to the root cgroup - * * Plugging? * * If data write is less than hard sector size of ssd, round up offset in open * bucket to the next whole sector * - * Also lookup by cgroup in get_open_bucket() - * * Superblock needs to be fleshed out for multiple cache devices * * Add a sysfs tunable for the number of writeback IOs in flight diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3e880869871..15fff4f68a7 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -12,11 +12,9 @@ #include "request.h" #include "writeback.h" -#include #include #include #include -#include "blk-cgroup.h" #include @@ -27,171 +25,13 @@ struct kmem_cache *bch_search_cache; static void bch_data_insert_start(struct closure *); -/* Cgroup interface */ - -#ifdef CONFIG_CGROUP_BCACHE -static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 }; - -static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup) -{ - struct cgroup_subsys_state *css; - return cgroup && - (css = cgroup_subsys_state(cgroup, bcache_subsys_id)) - ? container_of(css, struct bch_cgroup, css) - : &bcache_default_cgroup; -} - -struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio) -{ - struct cgroup_subsys_state *css = bio->bi_css - ? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id) - : task_subsys_state(current, bcache_subsys_id); - - return css - ? container_of(css, struct bch_cgroup, css) - : &bcache_default_cgroup; -} - -static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, - struct file *file, - char __user *buf, size_t nbytes, loff_t *ppos) -{ - char tmp[1024]; - int len = bch_snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, - cgroup_to_bcache(cgrp)->cache_mode + 1); - - if (len < 0) - return len; - - return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); -} - -static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft, - const char *buf) -{ - int v = bch_read_string_list(buf, bch_cache_modes); - if (v < 0) - return v; - - cgroup_to_bcache(cgrp)->cache_mode = v - 1; - return 0; -} - -static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft) -{ - return cgroup_to_bcache(cgrp)->verify; -} - -static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val) -{ - cgroup_to_bcache(cgrp)->verify = val; - return 0; -} - -static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft) -{ - struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); - return atomic_read(&bcachecg->stats.cache_hits); -} - -static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft) -{ - struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); - return atomic_read(&bcachecg->stats.cache_misses); -} - -static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp, - struct cftype *cft) -{ - struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); - return atomic_read(&bcachecg->stats.cache_bypass_hits); -} - -static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp, - struct cftype *cft) -{ - struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); - return atomic_read(&bcachecg->stats.cache_bypass_misses); -} - -static struct cftype bch_files[] = { - { - .name = "cache_mode", - .read = cache_mode_read, - .write_string = cache_mode_write, - }, - { - .name = "verify", - .read_u64 = bch_verify_read, - .write_u64 = bch_verify_write, - }, - { - .name = "cache_hits", - .read_u64 = bch_cache_hits_read, - }, - { - .name = "cache_misses", - .read_u64 = bch_cache_misses_read, - }, - { - .name = "cache_bypass_hits", - .read_u64 = bch_cache_bypass_hits_read, - }, - { - .name = "cache_bypass_misses", - .read_u64 = bch_cache_bypass_misses_read, - }, - { } /* terminate */ -}; - -static void init_bch_cgroup(struct bch_cgroup *cg) -{ - cg->cache_mode = -1; -} - -static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) -{ - struct bch_cgroup *cg; - - cg = kzalloc(sizeof(*cg), GFP_KERNEL); - if (!cg) - return ERR_PTR(-ENOMEM); - init_bch_cgroup(cg); - return &cg->css; -} - -static void bcachecg_destroy(struct cgroup *cgroup) -{ - struct bch_cgroup *cg = cgroup_to_bcache(cgroup); - kfree(cg); -} - -struct cgroup_subsys bcache_subsys = { - .create = bcachecg_create, - .destroy = bcachecg_destroy, - .subsys_id = bcache_subsys_id, - .name = "bcache", - .module = THIS_MODULE, -}; -EXPORT_SYMBOL_GPL(bcache_subsys); -#endif - static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) { -#ifdef CONFIG_CGROUP_BCACHE - int r = bch_bio_to_cgroup(bio)->cache_mode; - if (r >= 0) - return r; -#endif return BDEV_CACHE_MODE(&dc->sb); } static bool verify(struct cached_dev *dc, struct bio *bio) { -#ifdef CONFIG_CGROUP_BCACHE - if (bch_bio_to_cgroup(bio)->verify) - return true; -#endif return dc->verify; } @@ -1305,9 +1145,6 @@ void bch_flash_dev_request_init(struct bcache_device *d) void bch_request_exit(void) { -#ifdef CONFIG_CGROUP_BCACHE - cgroup_unload_subsys(&bcache_subsys); -#endif if (bch_search_cache) kmem_cache_destroy(bch_search_cache); } @@ -1318,11 +1155,5 @@ int __init bch_request_init(void) if (!bch_search_cache) return -ENOMEM; -#ifdef CONFIG_CGROUP_BCACHE - cgroup_load_subsys(&bcache_subsys); - init_bch_cgroup(&bcache_default_cgroup); - - cgroup_add_cftypes(&bcache_subsys, bch_files); -#endif return 0; } diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index c117c4082aa..1ff36875c2b 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -1,8 +1,6 @@ #ifndef _BCACHE_REQUEST_H_ #define _BCACHE_REQUEST_H_ -#include - struct data_insert_op { struct closure cl; struct cache_set *c; @@ -42,20 +40,4 @@ void bch_flash_dev_request_init(struct bcache_device *d); extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache; -struct bch_cgroup { -#ifdef CONFIG_CGROUP_BCACHE - struct cgroup_subsys_state css; -#endif - /* - * We subtract one from the index into bch_cache_modes[], so that - * default == -1; this makes it so the rest match up with d->cache_mode, - * and we use d->cache_mode if cgrp->cache_mode < 0 - */ - short cache_mode; - bool verify; - struct cache_stat_collector stats; -}; - -struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio); - #endif /* _BCACHE_REQUEST_H_ */ diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index 84d0782f702..0ca072c20d0 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -201,9 +201,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, struct cached_dev *dc = container_of(d, struct cached_dev, disk); mark_cache_stats(&dc->accounting.collector, hit, bypass); mark_cache_stats(&c->accounting.collector, hit, bypass); -#ifdef CONFIG_CGROUP_BCACHE - mark_cache_stats(&(bch_bio_to_cgroup(s->orig_bio)->stats), hit, bypass); -#endif } void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d) -- cgit v1.2.3-18-g5258 From 7159b1ad3dded9da040b5c608acf3d52d50f661e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Feb 2014 18:43:32 -0800 Subject: bcache: Better alloc tracepoints Change the invalidate tracepoint to indicate how much data we're invalidating, and change the alloc tracepoints to indicate what offset they're for. Signed-off-by: Kent Overstreet --- drivers/md/bcache/alloc.c | 15 +++++++++++---- drivers/md/bcache/trace.c | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index c0d37d08244..a3e1427945f 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -162,10 +162,15 @@ static bool can_invalidate_bucket(struct cache *ca, struct bucket *b) static void invalidate_one_bucket(struct cache *ca, struct bucket *b) { + size_t bucket = b - ca->buckets; + + if (GC_SECTORS_USED(b)) + trace_bcache_invalidate(ca, bucket); + bch_inc_gen(ca, b); b->prio = INITIAL_PRIO; atomic_inc(&b->pin); - fifo_push(&ca->free_inc, b - ca->buckets); + fifo_push(&ca->free_inc, bucket); } /* @@ -301,8 +306,6 @@ static void invalidate_buckets(struct cache *ca) invalidate_buckets_random(ca); break; } - - trace_bcache_alloc_invalidate(ca); } #define allocator_wait(ca, cond) \ @@ -408,8 +411,10 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) fifo_pop(&ca->free[reserve], r)) goto out; - if (!wait) + if (!wait) { + trace_bcache_alloc_fail(ca, reserve); return -1; + } do { prepare_to_wait(&ca->set->bucket_wait, &w, @@ -425,6 +430,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) out: wake_up_process(ca->alloc_thread); + trace_bcache_alloc(ca, reserve); + if (expensive_debug_checks(ca->set)) { size_t iter; long i; diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c index adbc3df17a8..b7820b0d262 100644 --- a/drivers/md/bcache/trace.c +++ b/drivers/md/bcache/trace.c @@ -45,7 +45,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_invalidate); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); -- cgit v1.2.3-18-g5258 From 15754020524a56517df082799f07de880f4b29e2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 25 Feb 2014 17:34:21 -0800 Subject: bcache: Improve priority_stats Break down data into clean data/dirty data/metadata. Signed-off-by: Kent Overstreet --- drivers/md/bcache/sysfs.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index d8458d477a1..662b9484ed5 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -761,7 +761,9 @@ SHOW(__bch_cache) int cmp(const void *l, const void *r) { return *((uint16_t *) r) - *((uint16_t *) l); } - size_t n = ca->sb.nbuckets, i, unused, btree; + struct bucket *b; + size_t n = ca->sb.nbuckets, i; + size_t unused = 0, available = 0, dirty = 0, meta = 0; uint64_t sum = 0; /* Compute 31 quantiles */ uint16_t q[31], *p, *cached; @@ -772,6 +774,17 @@ SHOW(__bch_cache) return -ENOMEM; mutex_lock(&ca->set->bucket_lock); + for_each_bucket(b, ca) { + if (!GC_SECTORS_USED(b)) + unused++; + if (GC_MARK(b) == GC_MARK_RECLAIMABLE) + available++; + if (GC_MARK(b) == GC_MARK_DIRTY) + dirty++; + if (GC_MARK(b) == GC_MARK_METADATA) + meta++; + } + for (i = ca->sb.first_bucket; i < n; i++) p[i] = ca->buckets[i].prio; mutex_unlock(&ca->set->bucket_lock); @@ -786,10 +799,7 @@ SHOW(__bch_cache) while (cached < p + n && *cached == BTREE_PRIO) - cached++; - - btree = cached - p; - n -= btree; + cached++, n--; for (i = 0; i < n; i++) sum += INITIAL_PRIO - cached[i]; @@ -805,12 +815,16 @@ SHOW(__bch_cache) ret = scnprintf(buf, PAGE_SIZE, "Unused: %zu%%\n" + "Clean: %zu%%\n" + "Dirty: %zu%%\n" "Metadata: %zu%%\n" "Average: %llu\n" "Sectors per Q: %zu\n" "Quantiles: [", unused * 100 / (size_t) ca->sb.nbuckets, - btree * 100 / (size_t) ca->sb.nbuckets, sum, + available * 100 / (size_t) ca->sb.nbuckets, + dirty * 100 / (size_t) ca->sb.nbuckets, + meta * 100 / (size_t) ca->sb.nbuckets, sum, n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1)); for (i = 0; i < ARRAY_SIZE(q); i++) -- cgit v1.2.3-18-g5258 From c13f3af9247db929fe1be86c0442ef161e615ac4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 8 Jan 2014 21:22:02 -0800 Subject: bcache: Add bch_keylist_init_single() This will potentially save us an allocation when we've got inode/dirent bkeys that don't fit in the keylist's inline keys. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bset.h | 6 ++++++ drivers/md/bcache/journal.c | 5 +---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 003260f4ddf..5f6728d5d4d 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -478,6 +478,12 @@ static inline void bch_keylist_init(struct keylist *l) l->top_p = l->keys_p = l->inline_keys; } +static inline void bch_keylist_init_single(struct keylist *l, struct bkey *k) +{ + l->keys = k; + l->top = bkey_next(k); +} + static inline void bch_keylist_push(struct keylist *l) { l->top = bkey_next(l->top); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index cf8e0932aad..c8bfc28cd2b 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -313,8 +313,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) uint64_t start = i->j.last_seq, end = i->j.seq, n = start; struct keylist keylist; - bch_keylist_init(&keylist); - list_for_each_entry(i, list, list) { BUG_ON(i->pin && atomic_read(i->pin) != 1); @@ -327,8 +325,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) k = bkey_next(k)) { trace_bcache_journal_replay_key(k); - bkey_copy(keylist.top, k); - bch_keylist_push(&keylist); + bch_keylist_init_single(&keylist, k); ret = bch_btree_insert(s, &keylist, i->pin, NULL); if (ret) -- cgit v1.2.3-18-g5258 From 4fe6a816707aace9e8e297b708411c5930537793 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2014 13:46:29 -0700 Subject: bcache: Add a real GC_MARK_RECLAIMABLE This means the garbage collection code can better check for data and metadata pointers to the same buckets. Signed-off-by: Kent Overstreet --- drivers/md/bcache/alloc.c | 5 +++-- drivers/md/bcache/bcache.h | 6 +++--- drivers/md/bcache/btree.c | 18 ++++++++++++------ drivers/md/bcache/extents.c | 6 +++--- 4 files changed, 21 insertions(+), 14 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index a3e1427945f..5ba4eaea57f 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -155,7 +155,8 @@ add: static bool can_invalidate_bucket(struct cache *ca, struct bucket *b) { - return GC_MARK(b) == GC_MARK_RECLAIMABLE && + return (!GC_MARK(b) || + GC_MARK(b) == GC_MARK_RECLAIMABLE) && !atomic_read(&b->pin) && can_inc_bucket_gen(b); } @@ -475,7 +476,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) for (i = 0; i < KEY_PTRS(k); i++) { struct bucket *b = PTR_BUCKET(c, k, i); - SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + SET_GC_MARK(b, 0); SET_GC_SECTORS_USED(b, 0); bch_bucket_add_unused(PTR_CACHE(c, k, i), b); } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6d814f463d9..014236e411d 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -207,9 +207,9 @@ struct bucket { */ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); -#define GC_MARK_RECLAIMABLE 0 -#define GC_MARK_DIRTY 1 -#define GC_MARK_METADATA 2 +#define GC_MARK_RECLAIMABLE 1 +#define GC_MARK_DIRTY 2 +#define GC_MARK_METADATA 3 #define GC_SECTORS_USED_SIZE 13 #define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE)) BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ea5a59e2d74..1672db348c8 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1160,6 +1160,8 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, SET_GC_MARK(g, GC_MARK_METADATA); else if (KEY_DIRTY(k)) SET_GC_MARK(g, GC_MARK_DIRTY); + else if (!GC_MARK(g)) + SET_GC_MARK(g, GC_MARK_RECLAIMABLE); /* guard against overflow */ SET_GC_SECTORS_USED(g, min_t(unsigned, @@ -1559,7 +1561,7 @@ static void btree_gc_start(struct cache_set *c) for_each_bucket(b, ca) { b->gc_gen = b->gen; if (!atomic_read(&b->pin)) { - SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + SET_GC_MARK(b, 0); SET_GC_SECTORS_USED(b, 0); } } @@ -1622,12 +1624,16 @@ size_t bch_btree_gc_finish(struct cache_set *c) b->last_gc = b->gc_gen; c->need_gc = max(c->need_gc, bucket_gc_gen(b)); - if (!atomic_read(&b->pin) && - GC_MARK(b) == GC_MARK_RECLAIMABLE) { + if (atomic_read(&b->pin)) + continue; + + BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b)); + + if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) available++; - if (!GC_SECTORS_USED(b)) - bch_bucket_add_unused(ca, b); - } + + if (!GC_MARK(b)) + bch_bucket_add_unused(ca, b); } } diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 416d1a3e028..82d5e3288a6 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -499,9 +499,9 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, if (mutex_trylock(&b->c->bucket_lock)) { if (b->c->gc_mark_valid && - ((GC_MARK(g) != GC_MARK_DIRTY && - KEY_DIRTY(k)) || - GC_MARK(g) == GC_MARK_METADATA)) + (!GC_MARK(g) || + GC_MARK(g) == GC_MARK_METADATA || + (GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k)))) goto err; if (g->prio == BTREE_PRIO) -- cgit v1.2.3-18-g5258 From 05335cff9f01555b769ac97b7bacc472b7ed047a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Mar 2014 18:22:34 -0700 Subject: bcache: Fix a race when freeing btree nodes This isn't a bulletproof fix; btree_node_free() -> bch_bucket_free() puts the bucket on the unused freelist, where it can be reused right away without any ordering requirements. It would be better to wait on at least a journal write to go down before reusing the bucket. bch_btree_set_root() does this, and inserting into non leaf nodes is completely synchronous so we should be ok, but future patches are just going to get rid of the unused freelist - it was needed in the past for various reasons but shouldn't be anymore. Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 53 ++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 33 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 1672db348c8..e83732e2d91 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1006,8 +1006,6 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) static void btree_node_free(struct btree *b) { - unsigned i; - trace_bcache_btree_node_free(b); BUG_ON(b == b->c->root); @@ -1019,14 +1017,6 @@ static void btree_node_free(struct btree *b) cancel_delayed_work(&b->work); mutex_lock(&b->c->bucket_lock); - - for (i = 0; i < KEY_PTRS(&b->key); i++) { - BUG_ON(atomic_read(&PTR_BUCKET(b->c, &b->key, i)->pin)); - - bch_inc_gen(PTR_CACHE(b->c, &b->key, i), - PTR_BUCKET(b->c, &b->key, i)); - } - bch_bucket_free(b->c, &b->key); mca_bucket_free(b); mutex_unlock(&b->c->bucket_lock); @@ -1086,16 +1076,19 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k) { unsigned i; + mutex_lock(&b->c->bucket_lock); + + atomic_inc(&b->c->prio_blocked); + bkey_copy(k, &b->key); bkey_copy_key(k, &ZERO_KEY); - for (i = 0; i < KEY_PTRS(k); i++) { - uint8_t g = PTR_BUCKET(b->c, k, i)->gen + 1; - - SET_PTR_GEN(k, i, g); - } + for (i = 0; i < KEY_PTRS(k); i++) + SET_PTR_GEN(k, i, + bch_inc_gen(PTR_CACHE(b->c, &b->key, i), + PTR_BUCKET(b->c, &b->key, i))); - atomic_inc(&b->c->prio_blocked); + mutex_unlock(&b->c->bucket_lock); } static int btree_check_reserve(struct btree *b, struct btree_op *op) @@ -1342,6 +1335,13 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, bch_keylist_add(keylist, &new_nodes[i]->key); } + closure_sync(&cl); + + /* We emptied out this node */ + BUG_ON(btree_bset_first(new_nodes[0])->keys); + btree_node_free(new_nodes[0]); + rw_unlock(true, new_nodes[0]); + for (i = 0; i < nodes; i++) { if (__bch_keylist_realloc(keylist, bkey_u64s(&r[i].b->key))) goto out_nocoalesce; @@ -1350,12 +1350,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, bch_keylist_push(keylist); } - /* We emptied out this node */ - BUG_ON(btree_bset_first(new_nodes[0])->keys); - btree_node_free(new_nodes[0]); - rw_unlock(true, new_nodes[0]); - - closure_sync(&cl); + bch_btree_insert_node(b, op, keylist, NULL, NULL); + BUG_ON(!bch_keylist_empty(keylist)); for (i = 0; i < nodes; i++) { btree_node_free(r[i].b); @@ -1364,9 +1360,6 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, r[i].b = new_nodes[i]; } - bch_btree_insert_node(b, op, keylist, NULL, NULL); - BUG_ON(!bch_keylist_empty(keylist)); - memmove(r, r + 1, sizeof(r[0]) * (nodes - 1)); r[nodes - 1].b = ERR_PTR(-EINTR); @@ -1456,12 +1449,11 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, keys.top); bch_keylist_push(&keys); - btree_node_free(last->b); - bch_btree_insert_node(b, op, &keys, NULL, NULL); BUG_ON(!bch_keylist_empty(&keys)); + btree_node_free(last->b); rw_unlock(true, last->b); last->b = n; @@ -1924,26 +1916,21 @@ static int btree_split(struct btree *b, struct btree_op *op, closure_sync(&cl); bch_btree_set_root(n3); rw_unlock(true, n3); - - btree_node_free(b); } else if (!b->parent) { /* Root filled up but didn't need to be split */ closure_sync(&cl); bch_btree_set_root(n1); - - btree_node_free(b); } else { /* Split a non root node */ closure_sync(&cl); make_btree_freeing_key(b, parent_keys.top); bch_keylist_push(&parent_keys); - btree_node_free(b); - bch_btree_insert_node(b->parent, op, &parent_keys, NULL, NULL); BUG_ON(!bch_keylist_empty(&parent_keys)); } + btree_node_free(b); rw_unlock(true, n1); bch_time_stats_update(&b->c->btree_split_time, start_time); -- cgit v1.2.3-18-g5258 From 2a285686c109816ba71a00b9278262cf02648258 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Mar 2014 16:42:42 -0800 Subject: bcache: btree locking rework Add a new lock, b->write_lock, which is required to actually modify - or write - a btree node; this lock is only held for short durations. This means we can write out a btree node without taking b->lock, which _is_ held for long durations - solving a deadlock when btree_flush_write() (from the journalling code) is called with a btree node locked. Right now just occurs in bch_btree_set_root(), but with an upcoming journalling rework is going to happen a lot more. This also turns b->lock is now more of a read/intent lock instead of a read/write lock - but not completely, since it still blocks readers. May turn it into a real intent lock at some point in the future. Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 164 ++++++++++++++++++++++++++++++++------------ drivers/md/bcache/btree.h | 3 + drivers/md/bcache/journal.c | 9 ++- drivers/md/bcache/super.c | 9 ++- 4 files changed, 133 insertions(+), 52 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e83732e2d91..01b1b7e23cf 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -167,6 +167,20 @@ static inline struct bset *write_block(struct btree *b) return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); } +static void bch_btree_init_next(struct btree *b) +{ + /* If not a leaf node, always sort */ + if (b->level && b->keys.nsets) + bch_btree_sort(&b->keys, &b->c->sort); + else + bch_btree_sort_lazy(&b->keys, &b->c->sort); + + if (b->written < btree_blocks(b)) + bch_bset_init_next(&b->keys, write_block(b), + bset_magic(&b->c->sb)); + +} + /* Btree key manipulation */ void bkey_put(struct cache_set *c, struct bkey *k) @@ -438,10 +452,12 @@ static void do_btree_node_write(struct btree *b) } } -void bch_btree_node_write(struct btree *b, struct closure *parent) +void __bch_btree_node_write(struct btree *b, struct closure *parent) { struct bset *i = btree_bset_last(b); + lockdep_assert_held(&b->write_lock); + trace_bcache_btree_write(b); BUG_ON(current->bio_list); @@ -465,23 +481,24 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); b->written += set_blocks(i, block_bytes(b->c)); +} - /* If not a leaf node, always sort */ - if (b->level && b->keys.nsets) - bch_btree_sort(&b->keys, &b->c->sort); - else - bch_btree_sort_lazy(&b->keys, &b->c->sort); +void bch_btree_node_write(struct btree *b, struct closure *parent) +{ + unsigned nsets = b->keys.nsets; + + lockdep_assert_held(&b->lock); + + __bch_btree_node_write(b, parent); /* * do verify if there was more than one set initially (i.e. we did a * sort) and we sorted down to a single set: */ - if (i != b->keys.set->data && !b->keys.nsets) + if (nsets && !b->keys.nsets) bch_btree_verify(b); - if (b->written < btree_blocks(b)) - bch_bset_init_next(&b->keys, write_block(b), - bset_magic(&b->c->sb)); + bch_btree_init_next(b); } static void bch_btree_node_write_sync(struct btree *b) @@ -489,7 +506,11 @@ static void bch_btree_node_write_sync(struct btree *b) struct closure cl; closure_init_stack(&cl); + + mutex_lock(&b->write_lock); bch_btree_node_write(b, &cl); + mutex_unlock(&b->write_lock); + closure_sync(&cl); } @@ -497,11 +518,10 @@ static void btree_node_write_work(struct work_struct *w) { struct btree *b = container_of(to_delayed_work(w), struct btree, work); - rw_lock(true, b, b->level); - + mutex_lock(&b->write_lock); if (btree_node_dirty(b)) - bch_btree_node_write(b, NULL); - rw_unlock(true, b); + __bch_btree_node_write(b, NULL); + mutex_unlock(&b->write_lock); } static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) @@ -509,6 +529,8 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) struct bset *i = btree_bset_last(b); struct btree_write *w = btree_current_write(b); + lockdep_assert_held(&b->write_lock); + BUG_ON(!b->written); BUG_ON(!i->keys); @@ -593,6 +615,8 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, init_rwsem(&b->lock); lockdep_set_novalidate_class(&b->lock); + mutex_init(&b->write_lock); + lockdep_set_novalidate_class(&b->write_lock); INIT_LIST_HEAD(&b->list); INIT_DELAYED_WORK(&b->work, btree_node_write_work); b->c = c; @@ -626,8 +650,12 @@ static int mca_reap(struct btree *b, unsigned min_order, bool flush) up(&b->io_mutex); } + mutex_lock(&b->write_lock); if (btree_node_dirty(b)) - bch_btree_node_write_sync(b); + __bch_btree_node_write(b, &cl); + mutex_unlock(&b->write_lock); + + closure_sync(&cl); /* wait for any in flight btree write */ down(&b->io_mutex); @@ -1010,10 +1038,14 @@ static void btree_node_free(struct btree *b) BUG_ON(b == b->c->root); + mutex_lock(&b->write_lock); + if (btree_node_dirty(b)) btree_complete_write(b, btree_current_write(b)); clear_bit(BTREE_NODE_dirty, &b->flags); + mutex_unlock(&b->write_lock); + cancel_delayed_work(&b->work); mutex_lock(&b->c->bucket_lock); @@ -1065,8 +1097,10 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) { struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); if (!IS_ERR_OR_NULL(n)) { + mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); bkey_copy_key(&n->key, &b->key); + mutex_unlock(&n->write_lock); } return n; @@ -1269,6 +1303,9 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, goto out_nocoalesce; } + for (i = 0; i < nodes; i++) + mutex_lock(&new_nodes[i]->write_lock); + for (i = nodes - 1; i > 0; --i) { struct bset *n1 = btree_bset_first(new_nodes[i]); struct bset *n2 = btree_bset_first(new_nodes[i - 1]); @@ -1335,6 +1372,9 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, bch_keylist_add(keylist, &new_nodes[i]->key); } + for (i = 0; i < nodes; i++) + mutex_unlock(&new_nodes[i]->write_lock); + closure_sync(&cl); /* We emptied out this node */ @@ -1399,7 +1439,6 @@ static unsigned btree_gc_count_keys(struct btree *b) static int btree_gc_recurse(struct btree *b, struct btree_op *op, struct closure *writes, struct gc_stat *gc) { - unsigned i; int ret = 0; bool should_rewrite; struct btree *n; @@ -1407,13 +1446,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, struct keylist keys; struct btree_iter iter; struct gc_merge_info r[GC_MERGE_NODES]; - struct gc_merge_info *last = r + GC_MERGE_NODES - 1; + struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; bch_keylist_init(&keys); bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); - for (i = 0; i < GC_MERGE_NODES; i++) - r[i].b = ERR_PTR(-EINTR); + for (i = r; i < r + ARRAY_SIZE(r); i++) + i->b = ERR_PTR(-EINTR); while (1) { k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); @@ -1443,6 +1482,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, if (!IS_ERR_OR_NULL(n)) { bch_btree_node_write_sync(n); + bch_keylist_add(&keys, &n->key); make_btree_freeing_key(last->b, @@ -1475,8 +1515,10 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, * Must flush leaf nodes before gc ends, since replace * operations aren't journalled */ + mutex_lock(&last->b->write_lock); if (btree_node_dirty(last->b)) bch_btree_node_write(last->b, writes); + mutex_unlock(&last->b->write_lock); rw_unlock(true, last->b); } @@ -1489,11 +1531,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, } } - for (i = 0; i < GC_MERGE_NODES; i++) - if (!IS_ERR_OR_NULL(r[i].b)) { - if (btree_node_dirty(r[i].b)) - bch_btree_node_write(r[i].b, writes); - rw_unlock(true, r[i].b); + for (i = r; i < r + ARRAY_SIZE(r); i++) + if (!IS_ERR_OR_NULL(i->b)) { + mutex_lock(&i->b->write_lock); + if (btree_node_dirty(i->b)) + bch_btree_node_write(i->b, writes); + mutex_unlock(&i->b->write_lock); + rw_unlock(true, i->b); } bch_keylist_free(&keys); @@ -1514,6 +1558,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, if (!IS_ERR_OR_NULL(n)) { bch_btree_node_write_sync(n); + bch_btree_set_root(n); btree_node_free(b); rw_unlock(true, n); @@ -1871,6 +1916,9 @@ static int btree_split(struct btree *b, struct btree_op *op, goto err_free2; } + mutex_lock(&n1->write_lock); + mutex_lock(&n2->write_lock); + bch_btree_insert_keys(n1, op, insert_keys, replace_key); /* @@ -1897,21 +1945,26 @@ static int btree_split(struct btree *b, struct btree_op *op, bch_keylist_add(&parent_keys, &n2->key); bch_btree_node_write(n2, &cl); + mutex_unlock(&n2->write_lock); rw_unlock(true, n2); } else { trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys); + mutex_lock(&n1->write_lock); bch_btree_insert_keys(n1, op, insert_keys, replace_key); } bch_keylist_add(&parent_keys, &n1->key); bch_btree_node_write(n1, &cl); + mutex_unlock(&n1->write_lock); if (n3) { /* Depth increases, make a new root */ + mutex_lock(&n3->write_lock); bkey_copy_key(&n3->key, &MAX_KEY); bch_btree_insert_keys(n3, op, &parent_keys, NULL); bch_btree_node_write(n3, &cl); + mutex_unlock(&n3->write_lock); closure_sync(&cl); bch_btree_set_root(n3); @@ -1960,33 +2013,54 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op, atomic_t *journal_ref, struct bkey *replace_key) { + struct closure cl; + BUG_ON(b->level && replace_key); + closure_init_stack(&cl); + + mutex_lock(&b->write_lock); + + if (write_block(b) != btree_bset_last(b) && + b->keys.last_set_unwritten) + bch_btree_init_next(b); /* just wrote a set */ + if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) { - if (current->bio_list) { - op->lock = b->c->root->level + 1; - return -EAGAIN; - } else if (op->lock <= b->c->root->level) { - op->lock = b->c->root->level + 1; - return -EINTR; - } else { - /* Invalidated all iterators */ - int ret = btree_split(b, op, insert_keys, replace_key); + mutex_unlock(&b->write_lock); + goto split; + } - return bch_keylist_empty(insert_keys) ? - 0 : ret ?: -EINTR; - } - } else { - BUG_ON(write_block(b) != btree_bset_last(b)); + BUG_ON(write_block(b) != btree_bset_last(b)); - if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { - if (!b->level) - bch_btree_leaf_dirty(b, journal_ref); - else - bch_btree_node_write_sync(b); - } + if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { + if (!b->level) + bch_btree_leaf_dirty(b, journal_ref); + else + bch_btree_node_write(b, &cl); + } - return 0; + mutex_unlock(&b->write_lock); + + /* wait for btree node write if necessary, after unlock */ + closure_sync(&cl); + + return 0; +split: + if (current->bio_list) { + op->lock = b->c->root->level + 1; + return -EAGAIN; + } else if (op->lock <= b->c->root->level) { + op->lock = b->c->root->level + 1; + return -EINTR; + } else { + /* Invalidated all iterators */ + int ret = btree_split(b, op, insert_keys, replace_key); + + if (bch_keylist_empty(insert_keys)) + return 0; + else if (!ret) + return -EINTR; + return ret; } } diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index def9dc4a822..acebf26809c 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -127,6 +127,8 @@ struct btree { struct cache_set *c; struct btree *parent; + struct mutex write_lock; + unsigned long flags; uint16_t written; /* would be nice to kill */ uint8_t level; @@ -236,6 +238,7 @@ static inline void rw_unlock(bool w, struct btree *b) } void bch_btree_node_read_done(struct btree *); +void __bch_btree_node_write(struct btree *, struct closure *); void bch_btree_node_write(struct btree *, struct closure *); void bch_btree_set_root(struct btree *); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index c8bfc28cd2b..59e82021b5b 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -381,16 +381,15 @@ retry: b = best; if (b) { - rw_lock(true, b, b->level); - + mutex_lock(&b->write_lock); if (!btree_current_write(b)->journal) { - rw_unlock(true, b); + mutex_unlock(&b->write_lock); /* We raced */ goto retry; } - bch_btree_node_write(b, NULL); - rw_unlock(true, b); + __bch_btree_node_write(b, NULL); + mutex_unlock(&b->write_lock); } } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index ddfde380b49..9ded06434e1 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1398,9 +1398,12 @@ static void cache_set_flush(struct closure *cl) list_add(&c->root->list, &c->btree_cache); /* Should skip this if we're unregistering because of an error */ - list_for_each_entry(b, &c->btree_cache, list) + list_for_each_entry(b, &c->btree_cache, list) { + mutex_lock(&b->write_lock); if (btree_node_dirty(b)) - bch_btree_node_write(b, NULL); + __bch_btree_node_write(b, NULL); + mutex_unlock(&b->write_lock); + } for_each_cache(ca, c, i) if (ca->alloc_thread) @@ -1667,8 +1670,10 @@ static void run_cache_set(struct cache_set *c) if (IS_ERR_OR_NULL(c->root)) goto err; + mutex_lock(&c->root->write_lock); bkey_copy_key(&c->root->key, &MAX_KEY); bch_btree_node_write(c->root, &cl); + mutex_unlock(&c->root->write_lock); bch_btree_set_root(c->root); rw_unlock(true, c->root); -- cgit v1.2.3-18-g5258 From 56b30770b27d54d68ad51eccc6d888282b568cee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Jan 2014 01:44:55 -0800 Subject: bcache: Kill btree_io_wq With the locking rework in the last patch, this shouldn't be needed anymore - btree_node_write_work() only takes b->write_lock which is never held for very long. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 2 -- drivers/md/bcache/btree.c | 22 ++-------------------- drivers/md/bcache/super.c | 2 -- 3 files changed, 2 insertions(+), 24 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 014236e411d..15d26236caf 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -962,7 +962,5 @@ void bch_debug_exit(void); int bch_debug_init(struct kobject *); void bch_request_exit(void); int bch_request_init(void); -void bch_btree_exit(void); -int bch_btree_init(void); #endif /* _BCACHE_H */ diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 01b1b7e23cf..beb32551da7 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -93,8 +93,6 @@ #define PTR_HASH(c, k) \ (((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0)) -static struct workqueue_struct *btree_io_wq; - #define insert_lock(s, b) ((b)->level <= (s)->lock) /* @@ -362,8 +360,7 @@ static void __btree_node_write_done(struct closure *cl) btree_complete_write(b, w); if (btree_node_dirty(b)) - queue_delayed_work(btree_io_wq, &b->work, - msecs_to_jiffies(30000)); + schedule_delayed_work(&b->work, 30 * HZ); closure_return_with_destructor(cl, btree_node_write_unlock); } @@ -535,7 +532,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) BUG_ON(!i->keys); if (!btree_node_dirty(b)) - queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); + schedule_delayed_work(&b->work, 30 * HZ); set_btree_node_dirty(b); @@ -2446,18 +2443,3 @@ void bch_keybuf_init(struct keybuf *buf) spin_lock_init(&buf->lock); array_allocator_init(&buf->freelist); } - -void bch_btree_exit(void) -{ - if (btree_io_wq) - destroy_workqueue(btree_io_wq); -} - -int __init bch_btree_init(void) -{ - btree_io_wq = create_singlethread_workqueue("bch_btree_io"); - if (!btree_io_wq) - return -ENOMEM; - - return 0; -} diff --git a/drivers/md/bcache/super.