diff options
Diffstat (limited to 'mm/percpu.c')
| -rw-r--r-- | mm/percpu.c | 393 | 
1 files changed, 229 insertions, 164 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index efe816856a9..2ddf9a990db 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -67,6 +67,7 @@  #include <linux/spinlock.h>  #include <linux/vmalloc.h>  #include <linux/workqueue.h> +#include <linux/kmemleak.h>  #include <asm/cacheflush.h>  #include <asm/sections.h> @@ -101,10 +102,11 @@ struct pcpu_chunk {  	int			free_size;	/* free bytes in the chunk */  	int			contig_hint;	/* max contiguous size hint */  	void			*base_addr;	/* base address of this chunk */ -	int			map_used;	/* # of map entries used */ +	int			map_used;	/* # of map entries used before the sentry */  	int			map_alloc;	/* # of map entries allocated */  	int			*map;		/* allocation map */  	void			*data;		/* chunk data */ +	int			first_free;	/* no free below this */  	bool			immutable;	/* no [de]population allowed */  	unsigned long		populated[];	/* populated bitmap */  }; @@ -116,9 +118,9 @@ static int pcpu_atom_size __read_mostly;  static int pcpu_nr_slots __read_mostly;  static size_t pcpu_chunk_struct_size __read_mostly; -/* cpus with the lowest and highest unit numbers */ -static unsigned int pcpu_first_unit_cpu __read_mostly; -static unsigned int pcpu_last_unit_cpu __read_mostly; +/* cpus with the lowest and highest unit addresses */ +static unsigned int pcpu_low_unit_cpu __read_mostly; +static unsigned int pcpu_high_unit_cpu __read_mostly;  /* the address of the first chunk which starts with the kernel static area */  void *pcpu_base_addr __read_mostly; @@ -258,7 +260,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,  /*   * (Un)populated page region iterators.  Iterate over (un)populated - * page regions betwen @start and @end in @chunk.  @rs and @re should + * page regions between @start and @end in @chunk.  @rs and @re should   * be integer variables and will be set to start and end page index of   * the current region.   */ @@ -273,11 +275,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,  	     (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))  /** - * pcpu_mem_alloc - allocate memory + * pcpu_mem_zalloc - allocate memory   * @size: bytes to allocate   *   * Allocate @size bytes.  If @size is smaller than PAGE_SIZE, - * kzalloc() is used; otherwise, vmalloc() is used.  The returned + * kzalloc() is used; otherwise, vzalloc() is used.  The returned   * memory is always zeroed.   *   * CONTEXT: @@ -286,19 +288,15 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,   * RETURNS:   * Pointer to the allocated area on success, NULL on failure.   */ -static void *pcpu_mem_alloc(size_t size) +static void *pcpu_mem_zalloc(size_t size)  {  	if (WARN_ON_ONCE(!slab_is_available()))  		return NULL;  	if (size <= PAGE_SIZE)  		return kzalloc(size, GFP_KERNEL); -	else { -		void *ptr = vmalloc(size); -		if (ptr) -			memset(ptr, 0, size); -		return ptr; -	} +	else +		return vzalloc(size);  }  /** @@ -306,7 +304,7 @@ static void *pcpu_mem_alloc(size_t size)   * @ptr: memory to free   * @size: size of the area   * - * Free @ptr.  @ptr should have been allocated using pcpu_mem_alloc(). + * Free @ptr.  @ptr should have been allocated using pcpu_mem_zalloc().   */  static void pcpu_mem_free(void *ptr, size_t size)  { @@ -346,7 +344,7 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)   * @chunk: chunk of interest   *   * Determine whether area map of @chunk needs to be extended to - * accomodate a new allocation. + * accommodate a new allocation.   *   * CONTEXT:   * pcpu_lock. @@ -359,11 +357,11 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk)  {  	int new_alloc; -	if (chunk->map_alloc >= chunk->map_used + 2) +	if (chunk->map_alloc >= chunk->map_used + 3)  		return 0;  	new_alloc = PCPU_DFL_MAP_ALLOC; -	while (new_alloc < chunk->map_used + 2) +	while (new_alloc < chunk->map_used + 3)  		new_alloc *= 2;  	return new_alloc; @@ -388,7 +386,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)  	size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);  	unsigned long flags; -	new = pcpu_mem_alloc(new_size); +	new = pcpu_mem_zalloc(new_size);  	if (!new)  		return -ENOMEM; @@ -421,48 +419,6 @@ out_unlock:  }  /** - * pcpu_split_block - split a map block - * @chunk: chunk of interest - * @i: index of map block to split - * @head: head size in bytes (can be 0) - * @tail: tail size in bytes (can be 0) - * - * Split the @i'th map block into two or three blocks.  If @head is - * non-zero, @head bytes block is inserted before block @i moving it - * to @i+1 and reducing its size by @head bytes. - * - * If @tail is non-zero, the target block, which can be @i or @i+1 - * depending on @head, is reduced by @tail bytes and @tail byte block - * is inserted after the target block. - * - * @chunk->map must have enough free slots to accomodate the split. - * - * CONTEXT: - * pcpu_lock. - */ -static void pcpu_split_block(struct pcpu_chunk *chunk, int i, -			     int head, int tail) -{ -	int nr_extra = !!head + !!tail; - -	BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra); - -	/* insert new subblocks */ -	memmove(&chunk->map[i + nr_extra], &chunk->map[i], -		sizeof(chunk->map[0]) * (chunk->map_used - i)); -	chunk->map_used += nr_extra; - -	if (head) { -		chunk->map[i + 1] = chunk->map[i] - head; -		chunk->map[i++] = head; -	} -	if (tail) { -		chunk->map[i++] -= tail; -		chunk->map[i] = tail; -	} -} - -/**   * pcpu_alloc_area - allocate area from a pcpu_chunk   * @chunk: chunk of interest   * @size: wanted size in bytes @@ -486,19 +442,27 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)  	int oslot = pcpu_chunk_slot(chunk);  	int max_contig = 0;  	int i, off; +	bool seen_free = false; +	int *p; -	for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { -		bool is_last = i + 1 == chunk->map_used; +	for (i = chunk->first_free, p = chunk->map + i; i < chunk->map_used; i++, p++) {  		int head, tail; +		int this_size; + +		off = *p; +		if (off & 1) +			continue;  		/* extra for alignment requirement */  		head = ALIGN(off, align) - off; -		BUG_ON(i == 0 && head != 0); -		if (chunk->map[i] < 0) -			continue; -		if (chunk->map[i] < head + size) { -			max_contig = max(chunk->map[i], max_contig); +		this_size = (p[1] & ~1) - off; +		if (this_size < head + size) { +			if (!seen_free) { +				chunk->first_free = i; +				seen_free = true; +			} +			max_contig = max(this_size, max_contig);  			continue;  		} @@ -508,44 +472,59 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)  		 * than sizeof(int), which is very small but isn't too  		 * uncommon for percpu allocations.  		 */ -		if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { -			if (chunk->map[i - 1] > 0) -				chunk->map[i - 1] += head; -			else { -				chunk->map[i - 1] -= head; +		if (head && (head < sizeof(int) || !(p[-1] & 1))) { +			*p = off += head; +			if (p[-1] & 1)  				chunk->free_size -= head; -			} -			chunk->map[i] -= head; -			off += head; +			else +				max_contig = max(*p - p[-1], max_contig); +			this_size -= head;  			head = 0;  		}  		/* if tail is small, just keep it around */ -		tail = chunk->map[i] - head - size; -		if (tail < sizeof(int)) +		tail = this_size - head - size; +		if (tail < sizeof(int)) {  			tail = 0; +			size = this_size - head; +		}  		/* split if warranted */  		if (head || tail) { -			pcpu_split_block(chunk, i, head, tail); +			int nr_extra = !!head + !!tail; + +			/* insert new subblocks */ +			memmove(p + nr_extra + 1, p + 1, +				sizeof(chunk->map[0]) * (chunk->map_used - i)); +			chunk->map_used += nr_extra; +  			if (head) { -				i++; -				off += head; -				max_contig = max(chunk->map[i - 1], max_contig); +				if (!seen_free) { +					chunk->first_free = i; +					seen_free = true; +				} +				*++p = off += head; +				++i; +				max_contig = max(head, max_contig); +			} +			if (tail) { +				p[1] = off + size; +				max_contig = max(tail, max_contig);  			} -			if (tail) -				max_contig = max(chunk->map[i + 1], max_contig);  		} +		if (!seen_free) +			chunk->first_free = i + 1; +  		/* update hint and mark allocated */ -		if (is_last) +		if (i + 1 == chunk->map_used)  			chunk->contig_hint = max_contig; /* fully scanned */  		else  			chunk->contig_hint = max(chunk->contig_hint,  						 max_contig); -		chunk->free_size -= chunk->map[i]; -		chunk->map[i] = -chunk->map[i]; +		chunk->free_size -= size; +		*p |= 1;  		pcpu_chunk_relocate(chunk, oslot);  		return off; @@ -573,34 +552,50 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)  static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)  {  	int oslot = pcpu_chunk_slot(chunk); -	int i, off; - -	for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) -		if (off == freeme) -			break; +	int off = 0; +	unsigned i, j; +	int to_free = 0; +	int *p; + +	freeme |= 1;	/* we are searching for <given offset, in use> pair */ + +	i = 0; +	j = chunk->map_used; +	while (i != j) { +		unsigned k = (i + j) / 2; +		off = chunk->map[k]; +		if (off < freeme) +			i = k + 1; +		else if (off > freeme) +			j = k; +		else +			i = j = k; +	}  	BUG_ON(off != freeme); -	BUG_ON(chunk->map[i] > 0); -	chunk->map[i] = -chunk->map[i]; -	chunk->free_size += chunk->map[i]; +	if (i < chunk->first_free) +		chunk->first_free = i; +	p = chunk->map + i; +	*p = off &= ~1; +	chunk->free_size += (p[1] & ~1) - off; + +	/* merge with next? */ +	if (!(p[1] & 1)) +		to_free++;  	/* merge with previous? */ -	if (i > 0 && chunk->map[i - 1] >= 0) { -		chunk->map[i - 1] += chunk->map[i]; -		chunk->map_used--; -		memmove(&chunk->map[i], &chunk->map[i + 1], -			(chunk->map_used - i) * sizeof(chunk->map[0])); +	if (i > 0 && !(p[-1] & 1)) { +		to_free++;  		i--; +		p--;  	} -	/* merge with next? */ -	if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { -		chunk->map[i] += chunk->map[i + 1]; -		chunk->map_used--; -		memmove(&chunk->map[i + 1], &chunk->map[i + 2], -			(chunk->map_used - (i + 1)) * sizeof(chunk->map[0])); +	if (to_free) { +		chunk->map_used -= to_free; +		memmove(p + 1, p + 1 + to_free, +			(chunk->map_used - i) * sizeof(chunk->map[0]));  	} -	chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); +	chunk->contig_hint = max(chunk->map[i + 1] - chunk->map[i] - 1, chunk->contig_hint);  	pcpu_chunk_relocate(chunk, oslot);  } @@ -608,18 +603,21 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)  {  	struct pcpu_chunk *chunk; -	chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); +	chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);  	if (!chunk)  		return NULL; -	chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); +	chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * +						sizeof(chunk->map[0]));  	if (!chunk->map) { -		kfree(chunk); +		pcpu_mem_free(chunk, pcpu_chunk_struct_size);  		return NULL;  	}  	chunk->map_alloc = PCPU_DFL_MAP_ALLOC; -	chunk->map[chunk->map_used++] = pcpu_unit_size; +	chunk->map[0] = 0; +	chunk->map[1] = pcpu_unit_size | 1; +	chunk->map_used = 1;  	INIT_LIST_HEAD(&chunk->list);  	chunk->free_size = pcpu_unit_size; @@ -633,7 +631,7 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)  	if (!chunk)  		return;  	pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0])); -	kfree(chunk); +	pcpu_mem_free(chunk, pcpu_chunk_struct_size);  }  /* @@ -713,6 +711,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)  	const char *err;  	int slot, off, new_alloc;  	unsigned long flags; +	void __percpu *ptr; + +	/* +	 * We want the lowest bit of offset available for in-use/free +	 * indicator, so force >= 16bit alignment and make size even. +	 */ +	if (unlikely(align < 2)) +		align = 2; + +	if (unlikely(size & 1)) +		size++;  	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {  		WARN(true, "illegal size (%zu) or align (%zu) for " @@ -805,7 +814,9 @@ area_found:  	mutex_unlock(&pcpu_alloc_mutex);  	/* return address relative to base address */ -	return __addr_to_pcpu_ptr(chunk->base_addr + off); +	ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); +	kmemleak_alloc_percpu(ptr, size); +	return ptr;  fail_unlock:  	spin_unlock_irqrestore(&pcpu_lock, flags); @@ -919,6 +930,8 @@ void free_percpu(void __percpu *ptr)  	if (!ptr)  		return; +	kmemleak_free_percpu(ptr); +  	addr = __pcpu_ptr_to_addr(ptr);  	spin_lock_irqsave(&pcpu_lock, flags); @@ -981,6 +994,17 @@ bool is_kernel_percpu_address(unsigned long addr)   * address.  The caller is responsible for ensuring @addr stays valid   * until this function finishes.   * + * percpu allocator has special setup for the first chunk, which currently + * supports either embedding in linear address space or vmalloc mapping, + * and, from the second one, the backing allocator (currently either vm or + * km) provides translation. + * + * The addr can be tranlated simply without checking if it falls into the + * first chunk. But the current code reflects better how percpu allocator + * actually works, and the verification can discover both bugs in percpu + * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current + * code. + *   * RETURNS:   * The physical address for @addr.   */ @@ -988,19 +1012,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)  {  	void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);  	bool in_first_chunk = false; -	unsigned long first_start, first_end; +	unsigned long first_low, first_high;  	unsigned int cpu;  	/* -	 * The following test on first_start/end isn't strictly +	 * The following test on unit_low/high isn't strictly  	 * necessary but will speed up lookups of addresses which  	 * aren't in the first chunk.  	 */ -	first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); -	first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, -				    pcpu_unit_pages); -	if ((unsigned long)addr >= first_start && -	    (unsigned long)addr < first_end) { +	first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); +	first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, +				     pcpu_unit_pages); +	if ((unsigned long)addr >= first_low && +	    (unsigned long)addr < first_high) {  		for_each_possible_cpu(cpu) {  			void *start = per_cpu_ptr(base, cpu); @@ -1012,13 +1036,14 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)  	}  	if (in_first_chunk) { -		if ((unsigned long)addr < VMALLOC_START || -		    (unsigned long)addr >= VMALLOC_END) +		if (!is_vmalloc_addr(addr))  			return __pa(addr);  		else -			return page_to_phys(vmalloc_to_page(addr)); +			return page_to_phys(vmalloc_to_page(addr)) + +			       offset_in_page(addr);  	} else -		return page_to_phys(pcpu_addr_to_page(addr)); +		return page_to_phys(pcpu_addr_to_page(addr)) + +		       offset_in_page(addr);  }  /** @@ -1048,7 +1073,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,  			  __alignof__(ai->groups[0].cpu_map[0]));  	ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); -	ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size)); +	ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), 0);  	if (!ptr)  		return NULL;  	ai = ptr; @@ -1073,7 +1098,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,   */  void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)  { -	free_bootmem(__pa(ai), ai->__ai_size); +	memblock_free_early(__pa(ai), ai->__ai_size);  }  /** @@ -1117,20 +1142,20 @@ static void pcpu_dump_alloc_info(const char *lvl,  		for (alloc_end += gi->nr_units / upa;  		     alloc < alloc_end; alloc++) {  			if (!(alloc % apl)) { -				printk("\n"); +				printk(KERN_CONT "\n");  				printk("%spcpu-alloc: ", lvl);  			} -			printk("[%0*d] ", group_width, group); +			printk(KERN_CONT "[%0*d] ", group_width, group);  			for (unit_end += upa; unit < unit_end; unit++)  				if (gi->cpu_map[unit] != NR_CPUS) -					printk("%0*d ", cpu_width, +					printk(KERN_CONT "%0*d ", cpu_width,  					       gi->cpu_map[unit]);  				else -					printk("%s ", empty_str); +					printk(KERN_CONT "%s ", empty_str);  		}  	} -	printk("\n"); +	printk(KERN_CONT "\n");  }  /** @@ -1220,8 +1245,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  	PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);  #ifdef CONFIG_SMP  	PCPU_SETUP_BUG_ON(!ai->static_size); +	PCPU_SETUP_BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK);  #endif  	PCPU_SETUP_BUG_ON(!base_addr); +	PCPU_SETUP_BUG_ON((unsigned long)base_addr & ~PAGE_MASK);  	PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);  	PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);  	PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); @@ -1229,14 +1256,18 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  	PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);  	/* process group information and build config tables accordingly */ -	group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); -	group_sizes = alloc_bootmem(ai->nr_groups * sizeof(group_sizes[0])); -	unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0])); -	unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); +	group_offsets = memblock_virt_alloc(ai->nr_groups * +					     sizeof(group_offsets[0]), 0); +	group_sizes = memblock_virt_alloc(ai->nr_groups * +					   sizeof(group_sizes[0]), 0); +	unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0); +	unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);  	for (cpu = 0; cpu < nr_cpu_ids; cpu++)  		unit_map[cpu] = UINT_MAX; -	pcpu_first_unit_cpu = NR_CPUS; + +	pcpu_low_unit_cpu = NR_CPUS; +	pcpu_high_unit_cpu = NR_CPUS;  	for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {  		const struct pcpu_group_info *gi = &ai->groups[group]; @@ -1256,9 +1287,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  			unit_map[cpu] = unit + i;  			unit_off[cpu] = gi->base_offset + i * ai->unit_size; -			if (pcpu_first_unit_cpu == NR_CPUS) -				pcpu_first_unit_cpu = cpu; -			pcpu_last_unit_cpu = cpu; +			/* determine low/high unit_cpu */ +			if (pcpu_low_unit_cpu == NR_CPUS || +			    unit_off[cpu] < unit_off[pcpu_low_unit_cpu]) +				pcpu_low_unit_cpu = cpu; +			if (pcpu_high_unit_cpu == NR_CPUS || +			    unit_off[cpu] > unit_off[pcpu_high_unit_cpu]) +				pcpu_high_unit_cpu = cpu;  		}  	}  	pcpu_nr_units = unit; @@ -1268,7 +1303,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  	/* we're done parsing the input, undefine BUG macro and dump config */  #undef PCPU_SETUP_BUG_ON -	pcpu_dump_alloc_info(KERN_INFO, ai); +	pcpu_dump_alloc_info(KERN_DEBUG, ai);  	pcpu_nr_groups = ai->nr_groups;  	pcpu_group_offsets = group_offsets; @@ -1288,7 +1323,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  	 * empty chunks.  	 */  	pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; -	pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); +	pcpu_slot = memblock_virt_alloc( +			pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);  	for (i = 0; i < pcpu_nr_slots; i++)  		INIT_LIST_HEAD(&pcpu_slot[i]); @@ -1299,7 +1335,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  	 * covers static area + reserved area (mostly used for module  	 * static percpu allocation).  	 */ -	schunk = alloc_bootmem(pcpu_chunk_struct_size); +	schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);  	INIT_LIST_HEAD(&schunk->list);  	schunk->base_addr = base_addr;  	schunk->map = smap; @@ -1317,13 +1353,17 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  	}  	schunk->contig_hint = schunk->free_size; -	schunk->map[schunk->map_used++] = -ai->static_size; +	schunk->map[0] = 1; +	schunk->map[1] = ai->static_size; +	schunk->map_used = 1;  	if (schunk->free_size) -		schunk->map[schunk->map_used++] = schunk->free_size; +		schunk->map[++schunk->map_used] = 1 | (ai->static_size + schunk->free_size); +	else +		schunk->map[1] |= 1;  	/* init dynamic chunk if necessary */  	if (dyn_size) { -		dchunk = alloc_bootmem(pcpu_chunk_struct_size); +		dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);  		INIT_LIST_HEAD(&dchunk->list);  		dchunk->base_addr = base_addr;  		dchunk->map = dmap; @@ -1332,8 +1372,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  		bitmap_fill(dchunk->populated, pcpu_unit_pages);  		dchunk->contig_hint = dchunk->free_size = dyn_size; -		dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; -		dchunk->map[dchunk->map_used++] = dchunk->free_size; +		dchunk->map[0] = 1; +		dchunk->map[1] = pcpu_reserved_chunk_limit; +		dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; +		dchunk->map_used = 2;  	}  	/* link the first chunk in */ @@ -1347,7 +1389,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,  #ifdef CONFIG_SMP -const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { +const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = {  	[PCPU_FC_AUTO]	= "auto",  	[PCPU_FC_EMBED]	= "embed",  	[PCPU_FC_PAGE]	= "page", @@ -1357,6 +1399,9 @@ enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;  static int __init percpu_alloc_setup(char *str)  { +	if (!str) +		return -EINVAL; +  	if (0)  		/* nada */;  #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK @@ -1440,7 +1485,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(  	/*  	 * Determine min_unit_size, alloc_size and max_upa such that  	 * alloc_size is multiple of atom_size and is the smallest -	 * which can accomodate 4k aligned segments which are equal to +	 * which can accommodate 4k aligned segments which are equal to  	 * or larger than min_unit_size.  	 */  	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); @@ -1555,7 +1600,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(   * @atom_size: allocation atom size   * @cpu_distance_fn: callback to determine distance between cpus, optional   * @alloc_fn: function to allocate percpu page - * @free_fn: funtion to free percpu page + * @free_fn: function to free percpu page   *   * This is a helper to ease setting up embedded first percpu chunk and   * can be called where pcpu_setup_first_chunk() is expected. @@ -1600,7 +1645,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,  	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;  	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); -	areas = alloc_bootmem_nopanic(areas_size); +	areas = memblock_virt_alloc_nopanic(areas_size, 0);  	if (!areas) {  		rc = -ENOMEM;  		goto out_free; @@ -1622,9 +1667,21 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,  			rc = -ENOMEM;  			goto out_free_areas;  		} +		/* kmemleak tracks the percpu allocations separately */ +		kmemleak_free(ptr);  		areas[group] = ptr;  		base = min(ptr, base); +	} + +	/* +	 * Copy data and free unused parts.  This should happen after all +	 * allocations are complete; otherwise, we may end up with +	 * overlapping groups. +	 */ +	for (group = 0; group < ai->nr_groups; group++) { +		struct pcpu_group_info *gi = &ai->groups[group]; +		void *ptr = areas[group];  		for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {  			if (gi->cpu_map[i] == NR_CPUS) { @@ -1648,10 +1705,10 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,  	max_distance += ai->unit_size;  	/* warn if maximum distance is further than 75% of vmalloc space */ -	if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) { +	if (max_distance > VMALLOC_TOTAL * 3 / 4) {  		pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc " -			   "space 0x%lx\n", -			   max_distance, VMALLOC_END - VMALLOC_START); +			   "space 0x%lx\n", max_distance, +			   VMALLOC_TOTAL);  #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK  		/* and fail if we have fallback */  		rc = -EINVAL; @@ -1668,12 +1725,13 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,  out_free_areas:  	for (group = 0; group < ai->nr_groups; group++) -		free_fn(areas[group], -			ai->groups[group].nr_units * ai->unit_size); +		if (areas[group]) +			free_fn(areas[group], +				ai->groups[group].nr_units * ai->unit_size);  out_free:  	pcpu_free_alloc_info(ai);  	if (areas) -		free_bootmem(__pa(areas), areas_size); +		memblock_free_early(__pa(areas), areas_size);  	return rc;  }  #endif /* BUILD_EMBED_FIRST_CHUNK */ @@ -1683,7 +1741,7 @@ out_free:   * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages   * @reserved_size: the size of reserved percpu area in bytes   * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE - * @free_fn: funtion to free percpu page, always called with PAGE_SIZE + * @free_fn: function to free percpu page, always called with PAGE_SIZE   * @populate_pte_fn: function to populate pte   *   * This is a helper to ease setting up page-remapped first percpu @@ -1721,7 +1779,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,  	/* unaligned allocations can't be freed, round up to page size */  	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *  			       sizeof(pages[0])); -	pages = alloc_bootmem(pages_size); +	pages = memblock_virt_alloc(pages_size, 0);  	/* allocate pages */  	j = 0; @@ -1736,6 +1794,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,  					   "for cpu%u\n", psize_str, cpu);  				goto enomem;  			} +			/* kmemleak tracks the percpu allocations separately */ +			kmemleak_free(ptr);  			pages[j++] = virt_to_page(ptr);  		} @@ -1782,7 +1842,7 @@ enomem:  		free_fn(page_address(pages[j]), PAGE_SIZE);  	rc = -ENOMEM;  out_free_ar: -	free_bootmem(__pa(pages), pages_size); +	memblock_free_early(__pa(pages), pages_size);  	pcpu_free_alloc_info(ai);  	return rc;  } @@ -1807,12 +1867,13 @@ EXPORT_SYMBOL(__per_cpu_offset);  static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,  				       size_t align)  { -	return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); +	return  memblock_virt_alloc_from_nopanic( +			size, align, __pa(MAX_DMA_ADDRESS));  }  static void __init pcpu_dfl_fc_free(void *ptr, size_t size)  { -	free_bootmem(__pa(ptr), size); +	memblock_free_early(__pa(ptr), size);  }  void __init setup_per_cpu_areas(void) @@ -1855,9 +1916,13 @@ void __init setup_per_cpu_areas(void)  	void *fc;  	ai = pcpu_alloc_alloc_info(1, 1); -	fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); +	fc = memblock_virt_alloc_from_nopanic(unit_size, +					      PAGE_SIZE, +					      __pa(MAX_DMA_ADDRESS));  	if (!ai || !fc)  		panic("Failed to allocate memory for percpu areas."); +	/* kmemleak tracks the percpu allocations separately */ +	kmemleak_free(fc);  	ai->dyn_size = unit_size;  	ai->unit_size = unit_size; @@ -1892,7 +1957,7 @@ void __init percpu_init_late(void)  		BUILD_BUG_ON(size > PAGE_SIZE); -		map = pcpu_mem_alloc(size); +		map = pcpu_mem_zalloc(size);  		BUG_ON(!map);  		spin_lock_irqsave(&pcpu_lock, flags);  | 
