diff options
Diffstat (limited to 'drivers/md/dm-cache-policy-mq.c')
| -rw-r--r-- | drivers/md/dm-cache-policy-mq.c | 756 | 
1 files changed, 446 insertions, 310 deletions
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 4296155090b..0e385e40909 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -26,19 +26,6 @@ static unsigned next_power(unsigned n, unsigned min)  /*----------------------------------------------------------------*/ -static unsigned long *alloc_bitset(unsigned nr_entries) -{ -	size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); -	return vzalloc(s); -} - -static void free_bitset(unsigned long *bits) -{ -	vfree(bits); -} - -/*----------------------------------------------------------------*/ -  /*   * Large, sequential ios are probably better left on the origin device since   * spindles tend to have good bandwidth. @@ -85,7 +72,7 @@ static enum io_pattern iot_pattern(struct io_tracker *t)  static void iot_update_stats(struct io_tracker *t, struct bio *bio)  { -	if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1) +	if (bio->bi_iter.bi_sector == from_oblock(t->last_end_oblock) + 1)  		t->nr_seq_samples++;  	else {  		/* @@ -100,7 +87,7 @@ static void iot_update_stats(struct io_tracker *t, struct bio *bio)  		t->nr_rand_samples++;  	} -	t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1); +	t->last_end_oblock = to_oblock(bio_end_sector(bio) - 1);  }  static void iot_check_for_pattern_switch(struct io_tracker *t) @@ -151,6 +138,21 @@ static void queue_init(struct queue *q)  }  /* + * Checks to see if the queue is empty. + * FIXME: reduce cpu usage. + */ +static bool queue_empty(struct queue *q) +{ +	unsigned i; + +	for (i = 0; i < NR_QUEUE_LEVELS; i++) +		if (!list_empty(q->qs + i)) +			return false; + +	return true; +} + +/*   * Insert an entry to the back of the given level.   */  static void queue_push(struct queue *q, unsigned level, struct list_head *elt) @@ -218,17 +220,115 @@ struct entry {  	struct hlist_node hlist;  	struct list_head list;  	dm_oblock_t oblock; -	dm_cblock_t cblock;	/* valid iff in_cache */  	/*  	 * FIXME: pack these better  	 */ -	bool in_cache:1; +	bool dirty:1;  	unsigned hit_count;  	unsigned generation;  	unsigned tick;  }; +/* + * Rather than storing the cblock in an entry, we allocate all entries in + * an array, and infer the cblock from the entry position. + * + * Free entries are linked together into a list. + */ +struct entry_pool { +	struct entry *entries, *entries_end; +	struct list_head free; +	unsigned nr_allocated; +}; + +static int epool_init(struct entry_pool *ep, unsigned nr_entries) +{ +	unsigned i; + +	ep->entries = vzalloc(sizeof(struct entry) * nr_entries); +	if (!ep->entries) +		return -ENOMEM; + +	ep->entries_end = ep->entries + nr_entries; + +	INIT_LIST_HEAD(&ep->free); +	for (i = 0; i < nr_entries; i++) +		list_add(&ep->entries[i].list, &ep->free); + +	ep->nr_allocated = 0; + +	return 0; +} + +static void epool_exit(struct entry_pool *ep) +{ +	vfree(ep->entries); +} + +static struct entry *alloc_entry(struct entry_pool *ep) +{ +	struct entry *e; + +	if (list_empty(&ep->free)) +		return NULL; + +	e = list_entry(list_pop(&ep->free), struct entry, list); +	INIT_LIST_HEAD(&e->list); +	INIT_HLIST_NODE(&e->hlist); +	ep->nr_allocated++; + +	return e; +} + +/* + * This assumes the cblock hasn't already been allocated. + */ +static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock) +{ +	struct entry *e = ep->entries + from_cblock(cblock); + +	list_del_init(&e->list); +	INIT_HLIST_NODE(&e->hlist); +	ep->nr_allocated++; + +	return e; +} + +static void free_entry(struct entry_pool *ep, struct entry *e) +{ +	BUG_ON(!ep->nr_allocated); +	ep->nr_allocated--; +	INIT_HLIST_NODE(&e->hlist); +	list_add(&e->list, &ep->free); +} + +/* + * Returns NULL if the entry is free. + */ +static struct entry *epool_find(struct entry_pool *ep, dm_cblock_t cblock) +{ +	struct entry *e = ep->entries + from_cblock(cblock); +	return !hlist_unhashed(&e->hlist) ? e : NULL; +} + +static bool epool_empty(struct entry_pool *ep) +{ +	return list_empty(&ep->free); +} + +static bool in_pool(struct entry_pool *ep, struct entry *e) +{ +	return e >= ep->entries && e < ep->entries_end; +} + +static dm_cblock_t infer_cblock(struct entry_pool *ep, struct entry *e) +{ +	return to_cblock(e - ep->entries); +} + +/*----------------------------------------------------------------*/ +  struct mq_policy {  	struct dm_cache_policy policy; @@ -238,13 +338,22 @@ struct mq_policy {  	struct io_tracker tracker;  	/* -	 * We maintain two queues of entries.  The cache proper contains -	 * the currently active mappings.  Whereas the pre_cache tracks -	 * blocks that are being hit frequently and potential candidates -	 * for promotion to the cache. +	 * Entries come from two pools, one of pre-cache entries, and one +	 * for the cache proper. +	 */ +	struct entry_pool pre_cache_pool; +	struct entry_pool cache_pool; + +	/* +	 * We maintain three queues of entries.  The cache proper, +	 * consisting of a clean and dirty queue, contains the currently +	 * active mappings.  Whereas the pre_cache tracks blocks that +	 * are being hit frequently and potential candidates for promotion +	 * to the cache.  	 */  	struct queue pre_cache; -	struct queue cache; +	struct queue cache_clean; +	struct queue cache_dirty;  	/*  	 * Keeps track of time, incremented by the core.  We use this to @@ -281,24 +390,9 @@ struct mq_policy {  	 */  	unsigned promote_threshold; -	/* -	 * We need cache_size entries for the cache, and choose to have -	 * cache_size entries for the pre_cache too.  One motivation for -	 * using the same size is to make the hit counts directly -	 * comparable between pre_cache and cache. -	 */ -	unsigned nr_entries; -	unsigned nr_entries_allocated; -	struct list_head free; - -	/* -	 * Cache blocks may be unallocated.  We store this info in a -	 * bitset. -	 */ -	unsigned long *allocation_bitset; -	unsigned nr_cblocks_allocated; -	unsigned find_free_nr_words; -	unsigned find_free_last_word; +	unsigned discard_promote_adjustment; +	unsigned read_promote_adjustment; +	unsigned write_promote_adjustment;  	/*  	 * The hash table allows us to quickly find an entry by origin @@ -309,48 +403,9 @@ struct mq_policy {  	struct hlist_head *table;  }; -/*----------------------------------------------------------------*/ -/* Free/alloc mq cache entry structures. */ -static void takeout_queue(struct list_head *lh, struct queue *q) -{ -	unsigned level; - -	for (level = 0; level < NR_QUEUE_LEVELS; level++) -		list_splice(q->qs + level, lh); -} - -static void free_entries(struct mq_policy *mq) -{ -	struct entry *e, *tmp; - -	takeout_queue(&mq->free, &mq->pre_cache); -	takeout_queue(&mq->free, &mq->cache); - -	list_for_each_entry_safe(e, tmp, &mq->free, list) -		kmem_cache_free(mq_entry_cache, e); -} - -static int alloc_entries(struct mq_policy *mq, unsigned elts) -{ -	unsigned u = mq->nr_entries; - -	INIT_LIST_HEAD(&mq->free); -	mq->nr_entries_allocated = 0; - -	while (u--) { -		struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL); - -		if (!e) { -			free_entries(mq); -			return -ENOMEM; -		} - - -		list_add(&e->list, &mq->free); -	} - -	return 0; -} +#define DEFAULT_DISCARD_PROMOTE_ADJUSTMENT 1 +#define DEFAULT_READ_PROMOTE_ADJUSTMENT 4 +#define DEFAULT_WRITE_PROMOTE_ADJUSTMENT 8  /*----------------------------------------------------------------*/ @@ -388,96 +443,14 @@ static void hash_remove(struct entry *e)  /*----------------------------------------------------------------*/ -/* - * Allocates a new entry structure.  The memory is allocated in one lump, - * so we just handing it out here.  Returns NULL if all entries have - * already been allocated.  Cannot fail otherwise. - */ -static struct entry *alloc_entry(struct mq_policy *mq) -{ -	struct entry *e; - -	if (mq->nr_entries_allocated >= mq->nr_entries) { -		BUG_ON(!list_empty(&mq->free)); -		return NULL; -	} - -	e = list_entry(list_pop(&mq->free), struct entry, list); -	INIT_LIST_HEAD(&e->list); -	INIT_HLIST_NODE(&e->hlist); - -	mq->nr_entries_allocated++; -	return e; -} - -/*----------------------------------------------------------------*/ - -/* - * Mark cache blocks allocated or not in the bitset. - */ -static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock) -{ -	BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); -	BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset)); - -	set_bit(from_cblock(cblock), mq->allocation_bitset); -	mq->nr_cblocks_allocated++; -} - -static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock) -{ -	BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); -	BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset)); - -	clear_bit(from_cblock(cblock), mq->allocation_bitset); -	mq->nr_cblocks_allocated--; -} -  static bool any_free_cblocks(struct mq_policy *mq)  { -	return mq->nr_cblocks_allocated < from_cblock(mq->cache_size); +	return !epool_empty(&mq->cache_pool);  } -/* - * Fills result out with a cache block that isn't in use, or return - * -ENOSPC.  This does _not_ mark the cblock as allocated, the caller is - * reponsible for that. - */ -static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end, -			      dm_cblock_t *result, unsigned *last_word) +static bool any_clean_cblocks(struct mq_policy *mq)  { -	int r = -ENOSPC; -	unsigned w; - -	for (w = begin; w < end; w++) { -		/* -		 * ffz is undefined if no zero exists -		 */ -		if (mq->allocation_bitset[w] != ~0UL) { -			*last_word = w; -			*result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w])); -			if (from_cblock(*result) < from_cblock(mq->cache_size)) -				r = 0; - -			break; -		} -	} - -	return r; -} - -static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result) -{ -	int r; - -	if (!any_free_cblocks(mq)) -		return -ENOSPC; - -	r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word); -	if (r == -ENOSPC && mq->find_free_last_word) -		r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word); - -	return r; +	return !queue_empty(&mq->cache_clean);  }  /*----------------------------------------------------------------*/ @@ -496,33 +469,35 @@ static unsigned queue_level(struct entry *e)  	return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);  } +static bool in_cache(struct mq_policy *mq, struct entry *e) +{ +	return in_pool(&mq->cache_pool, e); +} +  /*   * Inserts the entry into the pre_cache or the cache.  Ensures the cache - * block is marked as allocated if necc.  Inserts into the hash table.  Sets the - * tick which records when the entry was last moved about. + * block is marked as allocated if necc.  Inserts into the hash table. + * Sets the tick which records when the entry was last moved about.   */  static void push(struct mq_policy *mq, struct entry *e)  {  	e->tick = mq->tick;  	hash_insert(mq, e); -	if (e->in_cache) { -		alloc_cblock(mq, e->cblock); -		queue_push(&mq->cache, queue_level(e), &e->list); -	} else +	if (in_cache(mq, e)) +		queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean, +			   queue_level(e), &e->list); +	else  		queue_push(&mq->pre_cache, queue_level(e), &e->list);  }  /*   * Removes an entry from pre_cache or cache.  Removes from the hash table. - * Frees off the cache block if necc.   */  static void del(struct mq_policy *mq, struct entry *e)  {  	queue_remove(&e->list);  	hash_remove(e); -	if (e->in_cache) -		free_cblock(mq, e->cblock);  }  /* @@ -531,14 +506,14 @@ static void del(struct mq_policy *mq, struct entry *e)   */  static struct entry *pop(struct mq_policy *mq, struct queue *q)  { -	struct entry *e = container_of(queue_pop(q), struct entry, list); +	struct entry *e; +	struct list_head *h = queue_pop(q); -	if (e) { -		hash_remove(e); +	if (!h) +		return NULL; -		if (e->in_cache) -			free_cblock(mq, e->cblock); -	} +	e = container_of(h, struct entry, list); +	hash_remove(e);  	return e;  } @@ -556,7 +531,8 @@ static bool updated_this_tick(struct mq_policy *mq, struct entry *e)   * of the entries.   *   * At the moment the threshold is taken by averaging the hit counts of some - * of the entries in the cache (the first 20 entries of the first level). + * of the entries in the cache (the first 20 entries across all levels in + * ascending order, giving preference to the clean entries at each level).   *   * We can be much cleverer than this though.  For example, each promotion   * could bump up the threshold helping to prevent churn.  Much more to do @@ -571,14 +547,21 @@ static void check_generation(struct mq_policy *mq)  	struct list_head *head;  	struct entry *e; -	if ((mq->hit_count >= mq->generation_period) && -	    (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) { - +	if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) {  		mq->hit_count = 0;  		mq->generation++;  		for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { -			head = mq->cache.qs + level; +			head = mq->cache_clean.qs + level; +			list_for_each_entry(e, head, list) { +				nr++; +				total += e->hit_count; + +				if (++count >= MAX_TO_AVERAGE) +					break; +			} + +			head = mq->cache_dirty.qs + level;  			list_for_each_entry(e, head, list) {  				nr++;  				total += e->hit_count; @@ -631,19 +614,30 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)   * - set the hit count to a hard coded value other than 1, eg, is it better   *   if it goes in at level 2?   */ -static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) +static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)  { -	dm_cblock_t result; -	struct entry *demoted = pop(mq, &mq->cache); +	struct entry *demoted = pop(mq, &mq->cache_clean); + +	if (!demoted) +		/* +		 * We could get a block from mq->cache_dirty, but that +		 * would add extra latency to the triggering bio as it +		 * waits for the writeback.  Better to not promote this +		 * time and hope there's a clean block next time this block +		 * is hit. +		 */ +		return -ENOSPC; -	BUG_ON(!demoted); -	result = demoted->cblock;  	*oblock = demoted->oblock; -	demoted->in_cache = false; -	demoted->hit_count = 1; -	push(mq, demoted); +	free_entry(&mq->cache_pool, demoted); -	return result; +	/* +	 * We used to put the demoted block into the pre-cache, but I think +	 * it's simpler to just let it work it's way up from zero again. +	 * Stops blocks flickering in and out of the cache. +	 */ + +	return 0;  }  /* @@ -655,24 +649,21 @@ static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)   * We bias towards reads, since they can be demoted at no cost if they   * haven't been dirtied.   */ -#define DISCARDED_PROMOTE_THRESHOLD 1 -#define READ_PROMOTE_THRESHOLD 4 -#define WRITE_PROMOTE_THRESHOLD 8 -  static unsigned adjusted_promote_threshold(struct mq_policy *mq,  					   bool discarded_oblock, int data_dir)  { -	if (discarded_oblock && any_free_cblocks(mq) && data_dir == WRITE) +	if (data_dir == READ) +		return mq->promote_threshold + mq->read_promote_adjustment; + +	if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) {  		/*  		 * We don't need to do any copying at all, so give this a -		 * very low threshold.  In practice this only triggers -		 * during initial population after a format. +		 * very low threshold.  		 */ -		return DISCARDED_PROMOTE_THRESHOLD; +		return mq->discard_promote_adjustment; +	} -	return data_dir == READ ? -		(mq->promote_threshold + READ_PROMOTE_THRESHOLD) : -		(mq->promote_threshold + WRITE_PROMOTE_THRESHOLD); +	return mq->promote_threshold + mq->write_promote_adjustment;  }  static bool should_promote(struct mq_policy *mq, struct entry *e, @@ -688,34 +679,49 @@ static int cache_entry_found(struct mq_policy *mq,  {  	requeue_and_update_tick(mq, e); -	if (e->in_cache) { +	if (in_cache(mq, e)) {  		result->op = POLICY_HIT; -		result->cblock = e->cblock; +		result->cblock = infer_cblock(&mq->cache_pool, e);  	}  	return 0;  }  /* - * Moves and entry from the pre_cache to the cache.  The main work is + * Moves an entry from the pre_cache to the cache.  The main work is   * finding which cache block to use.   */  static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,  			      struct policy_result *result)  { -	dm_cblock_t cblock; +	int r; +	struct entry *new_e; -	if (find_free_cblock(mq, &cblock) == -ENOSPC) { +	/* Ensure there's a free cblock in the cache */ +	if (epool_empty(&mq->cache_pool)) {  		result->op = POLICY_REPLACE; -		cblock = demote_cblock(mq, &result->old_oblock); +		r = demote_cblock(mq, &result->old_oblock); +		if (r) { +			result->op = POLICY_MISS; +			return 0; +		}  	} else  		result->op = POLICY_NEW; -	result->cblock = e->cblock = cblock; +	new_e = alloc_entry(&mq->cache_pool); +	BUG_ON(!new_e); + +	new_e->oblock = e->oblock; +	new_e->dirty = false; +	new_e->hit_count = e->hit_count; +	new_e->generation = e->generation; +	new_e->tick = e->tick;  	del(mq, e); -	e->in_cache = true; -	push(mq, e); +	free_entry(&mq->pre_cache_pool, e); +	push(mq, new_e); + +	result->cblock = infer_cblock(&mq->cache_pool, new_e);  	return 0;  } @@ -727,15 +733,18 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,  	int r = 0;  	bool updated = updated_this_tick(mq, e); -	requeue_and_update_tick(mq, e); -  	if ((!discarded_oblock && updated) || -	    !should_promote(mq, e, discarded_oblock, data_dir)) +	    !should_promote(mq, e, discarded_oblock, data_dir)) { +		requeue_and_update_tick(mq, e);  		result->op = POLICY_MISS; -	else if (!can_migrate) + +	} else if (!can_migrate)  		r = -EWOULDBLOCK; -	else + +	else { +		requeue_and_update_tick(mq, e);  		r = pre_cache_to_cache(mq, e, result); +	}  	return r;  } @@ -743,7 +752,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,  static void insert_in_pre_cache(struct mq_policy *mq,  				dm_oblock_t oblock)  { -	struct entry *e = alloc_entry(mq); +	struct entry *e = alloc_entry(&mq->pre_cache_pool);  	if (!e)  		/* @@ -757,7 +766,7 @@ static void insert_in_pre_cache(struct mq_policy *mq,  		return;  	} -	e->in_cache = false; +	e->dirty = false;  	e->oblock = oblock;  	e->hit_count = 1;  	e->generation = mq->generation; @@ -767,37 +776,43 @@ static void insert_in_pre_cache(struct mq_policy *mq,  static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,  			    struct policy_result *result)  { +	int r;  	struct entry *e; -	dm_cblock_t cblock; -	if (find_free_cblock(mq, &cblock) == -ENOSPC) { -		result->op = POLICY_MISS; -		insert_in_pre_cache(mq, oblock); -		return; -	} +	if (epool_empty(&mq->cache_pool)) { +		result->op = POLICY_REPLACE; +		r = demote_cblock(mq, &result->old_oblock); +		if (unlikely(r)) { +			result->op = POLICY_MISS; +			insert_in_pre_cache(mq, oblock); +			return; +		} -	e = alloc_entry(mq); -	if (unlikely(!e)) { -		result->op = POLICY_MISS; -		return; +		/* +		 * This will always succeed, since we've just demoted. +		 */ +		e = alloc_entry(&mq->cache_pool); +		BUG_ON(!e); + +	} else { +		e = alloc_entry(&mq->cache_pool); +		result->op = POLICY_NEW;  	}  	e->oblock = oblock; -	e->cblock = cblock; -	e->in_cache = true; +	e->dirty = false;  	e->hit_count = 1;  	e->generation = mq->generation;  	push(mq, e); -	result->op = POLICY_NEW; -	result->cblock = e->cblock; +	result->cblock = infer_cblock(&mq->cache_pool, e);  }  static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,  			  bool can_migrate, bool discarded_oblock,  			  int data_dir, struct policy_result *result)  { -	if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) == 1) { +	if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) {  		if (can_migrate)  			insert_in_cache(mq, oblock, result);  		else @@ -821,13 +836,16 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock,  	int r = 0;  	struct entry *e = hash_lookup(mq, oblock); -	if (e && e->in_cache) +	if (e && in_cache(mq, e))  		r = cache_entry_found(mq, e, result); +  	else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)  		result->op = POLICY_MISS; +  	else if (e)  		r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,  					  data_dir, result); +  	else  		r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,  				   data_dir, result); @@ -854,9 +872,9 @@ static void mq_destroy(struct dm_cache_policy *p)  {  	struct mq_policy *mq = to_mq_policy(p); -	free_bitset(mq->allocation_bitset); -	kfree(mq->table); -	free_entries(mq); +	vfree(mq->table); +	epool_exit(&mq->cache_pool); +	epool_exit(&mq->pre_cache_pool);  	kfree(mq);  } @@ -904,8 +922,8 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t  		return -EWOULDBLOCK;  	e = hash_lookup(mq, oblock); -	if (e && e->in_cache) { -		*cblock = e->cblock; +	if (e && in_cache(mq, e)) { +		*cblock = infer_cblock(&mq->cache_pool, e);  		r = 0;  	} else  		r = -ENOENT; @@ -915,6 +933,36 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t  	return r;  } +static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set) +{ +	struct entry *e; + +	e = hash_lookup(mq, oblock); +	BUG_ON(!e || !in_cache(mq, e)); + +	del(mq, e); +	e->dirty = set; +	push(mq, e); +} + +static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ +	struct mq_policy *mq = to_mq_policy(p); + +	mutex_lock(&mq->lock); +	__mq_set_clear_dirty(mq, oblock, true); +	mutex_unlock(&mq->lock); +} + +static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ +	struct mq_policy *mq = to_mq_policy(p); + +	mutex_lock(&mq->lock); +	__mq_set_clear_dirty(mq, oblock, false); +	mutex_unlock(&mq->lock); +} +  static int mq_load_mapping(struct dm_cache_policy *p,  			   dm_oblock_t oblock, dm_cblock_t cblock,  			   uint32_t hint, bool hint_valid) @@ -922,13 +970,9 @@ static int mq_load_mapping(struct dm_cache_policy *p,  	struct mq_policy *mq = to_mq_policy(p);  	struct entry *e; -	e = alloc_entry(mq); -	if (!e) -		return -ENOMEM; - -	e->cblock = cblock; +	e = alloc_particular_entry(&mq->cache_pool, cblock);  	e->oblock = oblock; -	e->in_cache = true; +	e->dirty = false;	/* this gets corrected in a minute */  	e->hit_count = hint_valid ? hint : 1;  	e->generation = mq->generation;  	push(mq, e); @@ -936,57 +980,126 @@ static int mq_load_mapping(struct dm_cache_policy *p,  	return 0;  } +static int mq_save_hints(struct mq_policy *mq, struct queue *q, +			 policy_walk_fn fn, void *context) +{ +	int r; +	unsigned level; +	struct entry *e; + +	for (level = 0; level < NR_QUEUE_LEVELS; level++) +		list_for_each_entry(e, q->qs + level, list) { +			r = fn(context, infer_cblock(&mq->cache_pool, e), +			       e->oblock, e->hit_count); +			if (r) +				return r; +		} + +	return 0; +} +  static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,  			    void *context)  {  	struct mq_policy *mq = to_mq_policy(p);  	int r = 0; -	struct entry *e; -	unsigned level;  	mutex_lock(&mq->lock); -	for (level = 0; level < NR_QUEUE_LEVELS; level++) -		list_for_each_entry(e, &mq->cache.qs[level], list) { -			r = fn(context, e->cblock, e->oblock, e->hit_count); -			if (r) -				goto out; -		} +	r = mq_save_hints(mq, &mq->cache_clean, fn, context); +	if (!r) +		r = mq_save_hints(mq, &mq->cache_dirty, fn, context); -out:  	mutex_unlock(&mq->lock);  	return r;  } +static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock) +{ +	struct entry *e; + +	e = hash_lookup(mq, oblock); +	BUG_ON(!e || !in_cache(mq, e)); + +	del(mq, e); +	free_entry(&mq->cache_pool, e); +} +  static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)  {  	struct mq_policy *mq = to_mq_policy(p); -	struct entry *e;  	mutex_lock(&mq->lock); +	__remove_mapping(mq, oblock); +	mutex_unlock(&mq->lock); +} -	e = hash_lookup(mq, oblock); +static int __remove_cblock(struct mq_policy *mq, dm_cblock_t cblock) +{ +	struct entry *e = epool_find(&mq->cache_pool, cblock); -	BUG_ON(!e || !e->in_cache); +	if (!e) +		return -ENODATA;  	del(mq, e); -	e->in_cache = false; -	push(mq, e); +	free_entry(&mq->cache_pool, e); + +	return 0; +} + +static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) +{ +	int r; +	struct mq_policy *mq = to_mq_policy(p); +	mutex_lock(&mq->lock); +	r = __remove_cblock(mq, cblock);  	mutex_unlock(&mq->lock); + +	return r;  } -static void force_mapping(struct mq_policy *mq, -			  dm_oblock_t current_oblock, dm_oblock_t new_oblock) +static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock, +			      dm_cblock_t *cblock)  { -	struct entry *e = hash_lookup(mq, current_oblock); +	struct entry *e = pop(mq, &mq->cache_dirty); -	BUG_ON(!e || !e->in_cache); +	if (!e) +		return -ENODATA; -	del(mq, e); -	e->oblock = new_oblock; +	*oblock = e->oblock; +	*cblock = infer_cblock(&mq->cache_pool, e); +	e->dirty = false;  	push(mq, e); + +	return 0; +} + +static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, +			     dm_cblock_t *cblock) +{ +	int r; +	struct mq_policy *mq = to_mq_policy(p); + +	mutex_lock(&mq->lock); +	r = __mq_writeback_work(mq, oblock, cblock); +	mutex_unlock(&mq->lock); + +	return r; +} + +static void __force_mapping(struct mq_policy *mq, +			    dm_oblock_t current_oblock, dm_oblock_t new_oblock) +{ +	struct entry *e = hash_lookup(mq, current_oblock); + +	if (e && in_cache(mq, e)) { +		del(mq, e); +		e->oblock = new_oblock; +		e->dirty = true; +		push(mq, e); +	}  }  static void mq_force_mapping(struct dm_cache_policy *p, @@ -995,16 +1108,20 @@ static void mq_force_mapping(struct dm_cache_policy *p,  	struct mq_policy *mq = to_mq_policy(p);  	mutex_lock(&mq->lock); -	force_mapping(mq, current_oblock, new_oblock); +	__force_mapping(mq, current_oblock, new_oblock);  	mutex_unlock(&mq->lock);  }  static dm_cblock_t mq_residency(struct dm_cache_policy *p)  { +	dm_cblock_t r;  	struct mq_policy *mq = to_mq_policy(p); -	/* FIXME: lock mutex, not sure we can block here */ -	return to_cblock(mq->nr_cblocks_allocated); +	mutex_lock(&mq->lock); +	r = to_cblock(mq->cache_pool.nr_allocated); +	mutex_unlock(&mq->lock); + +	return r;  }  static void mq_tick(struct dm_cache_policy *p) @@ -1021,20 +1138,28 @@ static int mq_set_config_value(struct dm_cache_policy *p,  			       const char *key, const char *value)  {  	struct mq_policy *mq = to_mq_policy(p); -	enum io_pattern pattern;  	unsigned long tmp; -	if (!strcasecmp(key, "random_threshold")) -		pattern = PATTERN_RANDOM; -	else if (!strcasecmp(key, "sequential_threshold")) -		pattern = PATTERN_SEQUENTIAL; -	else -		return -EINVAL; -  	if (kstrtoul(value, 10, &tmp))  		return -EINVAL; -	mq->tracker.thresholds[pattern] = tmp; +	if (!strcasecmp(key, "random_threshold")) { +		mq->tracker.thresholds[PATTERN_RANDOM] = tmp; + +	} else if (!strcasecmp(key, "sequential_threshold")) { +		mq->tracker.thresholds[PATTERN_SEQUENTIAL] = tmp; + +	} else if (!strcasecmp(key, "discard_promote_adjustment")) +		mq->discard_promote_adjustment = tmp; + +	else if (!strcasecmp(key, "read_promote_adjustment")) +		mq->read_promote_adjustment = tmp; + +	else if (!strcasecmp(key, "write_promote_adjustment")) +		mq->write_promote_adjustment = tmp; + +	else +		return -EINVAL;  	return 0;  } @@ -1044,9 +1169,16 @@ static int mq_emit_config_values(struct dm_cache_policy *p, char *result, unsign  	ssize_t sz = 0;  	struct mq_policy *mq = to_mq_policy(p); -	DMEMIT("4 random_threshold %u sequential_threshold %u", +	DMEMIT("10 random_threshold %u " +	       "sequential_threshold %u " +	       "discard_promote_adjustment %u " +	       "read_promote_adjustment %u " +	       "write_promote_adjustment %u",  	       mq->tracker.thresholds[PATTERN_RANDOM], -	       mq->tracker.thresholds[PATTERN_SEQUENTIAL]); +	       mq->tracker.thresholds[PATTERN_SEQUENTIAL], +	       mq->discard_promote_adjustment, +	       mq->read_promote_adjustment, +	       mq->write_promote_adjustment);  	return 0;  } @@ -1057,10 +1189,13 @@ static void init_policy_functions(struct mq_policy *mq)  	mq->policy.destroy = mq_destroy;  	mq->policy.map = mq_map;  	mq->policy.lookup = mq_lookup; +	mq->policy.set_dirty = mq_set_dirty; +	mq->policy.clear_dirty = mq_clear_dirty;  	mq->policy.load_mapping = mq_load_mapping;  	mq->policy.walk_mappings = mq_walk_mappings;  	mq->policy.remove_mapping = mq_remove_mapping; -	mq->policy.writeback_work = NULL; +	mq->policy.remove_cblock = mq_remove_cblock; +	mq->policy.writeback_work = mq_writeback_work;  	mq->policy.force_mapping = mq_force_mapping;  	mq->policy.residency = mq_residency;  	mq->policy.tick = mq_tick; @@ -1072,7 +1207,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,  					 sector_t origin_size,  					 sector_t cache_block_size)  { -	int r;  	struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);  	if (!mq) @@ -1080,47 +1214,48 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,  	init_policy_functions(mq);  	iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); -  	mq->cache_size = cache_size; + +	if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) { +		DMERR("couldn't initialize pool of pre-cache entries"); +		goto bad_pre_cache_init; +	} + +	if (epool_init(&mq->cache_pool, from_cblock(cache_size))) { +		DMERR("couldn't initialize pool of cache entries"); +		goto bad_cache_init; +	} +  	mq->tick_protected = 0;  	mq->tick = 0;  	mq->hit_count = 0;  	mq->generation = 0;  	mq->promote_threshold = 0; +	mq->discard_promote_adjustment = DEFAULT_DISCARD_PROMOTE_ADJUSTMENT; +	mq->read_promote_adjustment = DEFAULT_READ_PROMOTE_ADJUSTMENT; +	mq->write_promote_adjustment = DEFAULT_WRITE_PROMOTE_ADJUSTMENT;  	mutex_init(&mq->lock);  	spin_lock_init(&mq->tick_lock); -	mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG); -	mq->find_free_last_word = 0;  	queue_init(&mq->pre_cache); -	queue_init(&mq->cache); -	mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); - -	mq->nr_entries = 2 * from_cblock(cache_size); -	r = alloc_entries(mq, mq->nr_entries); -	if (r) -		goto bad_cache_alloc; +	queue_init(&mq->cache_clean); +	queue_init(&mq->cache_dirty); -	mq->nr_entries_allocated = 0; -	mq->nr_cblocks_allocated = 0; +	mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);  	mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);  	mq->hash_bits = ffs(mq->nr_buckets) - 1; -	mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); +	mq->table = vzalloc(sizeof(*mq->table) * mq->nr_buckets);  	if (!mq->table)  		goto bad_alloc_table; -	mq->allocation_bitset = alloc_bitset(from_cblock(cache_size)); -	if (!mq->allocation_bitset) -		goto bad_alloc_bitset; -  	return &mq->policy; -bad_alloc_bitset: -	kfree(mq->table);  bad_alloc_table: -	free_entries(mq); -bad_cache_alloc: +	epool_exit(&mq->cache_pool); +bad_cache_init: +	epool_exit(&mq->pre_cache_pool); +bad_pre_cache_init:  	kfree(mq);  	return NULL; @@ -1130,7 +1265,7 @@ bad_cache_alloc:  static struct dm_cache_policy_type mq_policy_type = {  	.name = "mq", -	.version = {1, 0, 0}, +	.version = {1, 2, 0},  	.hint_size = 4,  	.owner = THIS_MODULE,  	.create = mq_create @@ -1138,10 +1273,11 @@ static struct dm_cache_policy_type mq_policy_type = {  static struct dm_cache_policy_type default_policy_type = {  	.name = "default", -	.version = {1, 0, 0}, +	.version = {1, 2, 0},  	.hint_size = 4,  	.owner = THIS_MODULE, -	.create = mq_create +	.create = mq_create, +	.real = &mq_policy_type  };  static int __init mq_init(void)  | 
