diff options
| author | Steve French <sfrench@us.ibm.com> | 2006-03-20 16:58:09 +0000 | 
|---|---|---|
| committer | Steve French <sfrench@us.ibm.com> | 2006-03-20 16:58:09 +0000 | 
| commit | fd4a0b92db6a57cba8d03efbe1cebf91f9124ce0 (patch) | |
| tree | 5886a08bfa1132058b06074f4666a36dc5ddd2a1 /mm/slab.c | |
| parent | 88274815f7477dc7550439413ab87c5ce4c5a623 (diff) | |
| parent | 7705a8792b0fc82fd7d4dd923724606bbfd9fb20 (diff) | |
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'mm/slab.c')
| -rw-r--r-- | mm/slab.c | 96 | 
1 files changed, 79 insertions, 17 deletions
| diff --git a/mm/slab.c b/mm/slab.c index f2e92dc1c9c..d0bd7f07ab0 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char *  	dump_stack();  } +#ifdef CONFIG_NUMA +/* + * Special reaping functions for NUMA systems called from cache_reap(). + * These take care of doing round robin flushing of alien caches (containing + * objects freed on different nodes from which they were allocated) and the + * flushing of remote pcps by calling drain_node_pages. + */ +static DEFINE_PER_CPU(unsigned long, reap_node); + +static void init_reap_node(int cpu) +{ +	int node; + +	node = next_node(cpu_to_node(cpu), node_online_map); +	if (node == MAX_NUMNODES) +		node = 0; + +	__get_cpu_var(reap_node) = node; +} + +static void next_reap_node(void) +{ +	int node = __get_cpu_var(reap_node); + +	/* +	 * Also drain per cpu pages on remote zones +	 */ +	if (node != numa_node_id()) +		drain_node_pages(node); + +	node = next_node(node, node_online_map); +	if (unlikely(node >= MAX_NUMNODES)) +		node = first_node(node_online_map); +	__get_cpu_var(reap_node) = node; +} + +#else +#define init_reap_node(cpu) do { } while (0) +#define next_reap_node(void) do { } while (0) +#endif +  /*   * Initiate the reap timer running on the target CPU.  We run at around 1 to 2Hz   * via the workqueue/eventd. @@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu)  	 * at that time.  	 */  	if (keventd_up() && reap_work->func == NULL) { +		init_reap_node(cpu);  		INIT_WORK(reap_work, cache_reap, NULL);  		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);  	} @@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep,  	}  } +/* + * Called from cache_reap() to regularly drain alien caches round robin. + */ +static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) +{ +	int node = __get_cpu_var(reap_node); + +	if (l3->alien) { +		struct array_cache *ac = l3->alien[node]; +		if (ac && ac->avail) { +			spin_lock_irq(&ac->lock); +			__drain_alien_cache(cachep, ac, node); +			spin_unlock_irq(&ac->lock); +		} +	} +} +  static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)  {  	int i = 0; @@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al  #else  #define drain_alien_cache(cachep, alien) do { } while (0) +#define reap_alien(cachep, l3) do { } while (0)  static inline struct array_cache **alloc_alien_cache(int node, int limit)  { @@ -1124,6 +1184,7 @@ void __init kmem_cache_init(void)  	struct cache_sizes *sizes;  	struct cache_names *names;  	int i; +	int order;  	for (i = 0; i < NUM_INIT_LISTS; i++) {  		kmem_list3_init(&initkmem_list3[i]); @@ -1167,11 +1228,15 @@ void __init kmem_cache_init(void)  	cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); -	cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0, -		       &left_over, &cache_cache.num); +	for (order = 0; order < MAX_ORDER; order++) { +		cache_estimate(order, cache_cache.buffer_size, +			cache_line_size(), 0, &left_over, &cache_cache.num); +		if (cache_cache.num) +			break; +	}  	if (!cache_cache.num)  		BUG(); - +	cache_cache.gfporder = order;  	cache_cache.colour = left_over / cache_cache.colour_off;  	cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +  				      sizeof(struct slab), cache_line_size()); @@ -1648,6 +1713,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,  		left_over = remainder;  		/* +		 * A VFS-reclaimable slab tends to have most allocations +		 * as GFP_NOFS and we really don't want to have to be allocating +		 * higher-order pages when we are unable to shrink dcache. +		 */ +		if (flags & SLAB_RECLAIM_ACCOUNT) +			break; + +		/*  		 * Large number of objects is good, but very large slabs are  		 * currently bad for the gfp()s.  		 */ @@ -1869,17 +1942,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,  	size = ALIGN(size, align); -	if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) { -		/* -		 * A VFS-reclaimable slab tends to have most allocations -		 * as GFP_NOFS and we really don't want to have to be allocating -		 * higher-order pages when we are unable to shrink dcache. -		 */ -		cachep->gfporder = 0; -		cache_estimate(cachep->gfporder, size, align, flags, -			       &left_over, &cachep->num); -	} else -		left_over = calculate_slab_order(cachep, size, align, flags); +	left_over = calculate_slab_order(cachep, size, align, flags);  	if (!cachep->num) {  		printk("kmem_cache_create: couldn't create cache %s.\n", name); @@ -3494,8 +3557,7 @@ static void cache_reap(void *unused)  		check_irq_on();  		l3 = searchp->nodelists[numa_node_id()]; -		if (l3->alien) -			drain_alien_cache(searchp, l3->alien); +		reap_alien(searchp, l3);  		spin_lock_irq(&l3->list_lock);  		drain_array_locked(searchp, cpu_cache_get(searchp), 0, @@ -3545,7 +3607,7 @@ static void cache_reap(void *unused)  	}  	check_irq_on();  	mutex_unlock(&cache_chain_mutex); -	drain_remote_pages(); +	next_reap_node();  	/* Setup the next iteration */  	schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);  } | 
