diff options
Diffstat (limited to 'mm/mempolicy.c')
| -rw-r--r-- | mm/mempolicy.c | 322 | 
1 files changed, 137 insertions, 185 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 04729647f35..8f5330d74f4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -65,6 +65,8 @@     kernel is not always grateful with that.  */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/mempolicy.h>  #include <linux/mm.h>  #include <linux/highmem.h> @@ -91,6 +93,7 @@  #include <linux/ctype.h>  #include <linux/mm_inline.h>  #include <linux/mmu_notifier.h> +#include <linux/printk.h>  #include <asm/tlbflush.h>  #include <asm/uaccess.h> @@ -525,9 +528,14 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,  #ifdef CONFIG_HUGETLB_PAGE  	int nid;  	struct page *page; +	spinlock_t *ptl; +	pte_t entry; -	spin_lock(&vma->vm_mm->page_table_lock); -	page = pte_page(huge_ptep_get((pte_t *)pmd)); +	ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd); +	entry = huge_ptep_get((pte_t *)pmd); +	if (!pte_present(entry)) +		goto unlock; +	page = pte_page(entry);  	nid = page_to_nid(page);  	if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))  		goto unlock; @@ -536,7 +544,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,  	    (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))  		isolate_huge_page(page, private);  unlock: -	spin_unlock(&vma->vm_mm->page_table_lock); +	spin_unlock(ptl);  #else  	BUG();  #endif @@ -612,7 +620,7 @@ static inline int queue_pages_pgd_range(struct vm_area_struct *vma,  	return 0;  } -#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +#ifdef CONFIG_NUMA_BALANCING  /*   * This is used to mark a range of virtual addresses to be inaccessible.   * These are later cleared by a NUMA hinting fault. Depending on these @@ -626,7 +634,6 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,  			unsigned long addr, unsigned long end)  {  	int nr_updated; -	BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);  	nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);  	if (nr_updated) @@ -640,7 +647,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,  {  	return 0;  } -#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ +#endif /* CONFIG_NUMA_BALANCING */  /*   * Walk through page tables and collect pages to be migrated. @@ -649,19 +656,18 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,   * @nodes and @flags,) it's isolated and queued to the pagelist which is   * passed via @private.)   */ -static struct vm_area_struct * +static int  queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,  		const nodemask_t *nodes, unsigned long flags, void *private)  { -	int err; -	struct vm_area_struct *first, *vma, *prev; - +	int err = 0; +	struct vm_area_struct *vma, *prev; -	first = find_vma(mm, start); -	if (!first) -		return ERR_PTR(-EFAULT); +	vma = find_vma(mm, start); +	if (!vma) +		return -EFAULT;  	prev = NULL; -	for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { +	for (; vma && vma->vm_start < end; vma = vma->vm_next) {  		unsigned long endvma = vma->vm_end;  		if (endvma > end) @@ -671,9 +677,9 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,  		if (!(flags & MPOL_MF_DISCONTIG_OK)) {  			if (!vma->vm_next && vma->vm_end < end) -				return ERR_PTR(-EFAULT); +				return -EFAULT;  			if (prev && prev->vm_end < vma->vm_start) -				return ERR_PTR(-EFAULT); +				return -EFAULT;  		}  		if (flags & MPOL_MF_LAZY) { @@ -687,15 +693,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,  			err = queue_pages_pgd_range(vma, start, endvma, nodes,  						flags, private); -			if (err) { -				first = ERR_PTR(err); +			if (err)  				break; -			}  		}  next:  		prev = vma;  	} -	return first; +	return err;  }  /* @@ -795,36 +799,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,  	return err;  } -/* - * Update task->flags PF_MEMPOLICY bit: set iff non-default - * mempolicy.  Allows more rapid checking of this (combined perhaps - * with other PF_* flag bits) on memory allocation hot code paths. - * - * If called from outside this file, the task 'p' should -only- be - * a newly forked child not yet visible on the task list, because - * manipulating the task flags of a visible task is not safe. - * - * The above limitation is why this routine has the funny name - * mpol_fix_fork_child_flag(). - * - * It is also safe to call this with a task pointer of current, - * which the static wrapper mpol_set_task_struct_flag() does, - * for use within this file. - */ - -void mpol_fix_fork_child_flag(struct task_struct *p) -{ -	if (p->mempolicy) -		p->flags |= PF_MEMPOLICY; -	else -		p->flags &= ~PF_MEMPOLICY; -} - -static void mpol_set_task_struct_flag(void) -{ -	mpol_fix_fork_child_flag(current); -} -  /* Set the process memory policy */  static long do_set_mempolicy(unsigned short mode, unsigned short flags,  			     nodemask_t *nodes) @@ -861,7 +835,6 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,  	}  	old = current->mempolicy;  	current->mempolicy = new; -	mpol_set_task_struct_flag();  	if (new && new->mode == MPOL_INTERLEAVE &&  	    nodes_weight(new->v.nodes))  		current->il_next = first_node(new->v.nodes); @@ -1059,7 +1032,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,  			flags | MPOL_MF_DISCONTIG_OK, &pagelist);  	if (!list_empty(&pagelist)) { -		err = migrate_pages(&pagelist, new_node_page, dest, +		err = migrate_pages(&pagelist, new_node_page, NULL, dest,  					MIGRATE_SYNC, MR_SYSCALL);  		if (err)  			putback_movable_pages(&pagelist); @@ -1125,7 +1098,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,  	tmp = *from;  	while (!nodes_empty(tmp)) {  		int s,d; -		int source = -1; +		int source = NUMA_NO_NODE;  		int dest = 0;  		for_each_node_mask(s, tmp) { @@ -1160,7 +1133,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,  			if (!node_isset(dest, tmp))  				break;  		} -		if (source == -1) +		if (source == NUMA_NO_NODE)  			break;  		node_clear(source, tmp); @@ -1180,30 +1153,31 @@ out:  /*   * Allocate a new page for page migration based on vma policy. - * Start assuming that page is mapped by vma pointed to by @private. + * Start by assuming the page is mapped by the same vma as contains @start.   * Search forward from there, if not.  N.B., this assumes that the   * list of pages handed to migrate_pages()--which is how we get here--   * is in virtual address order.   */ -static struct page *new_vma_page(struct page *page, unsigned long private, int **x) +static struct page *new_page(struct page *page, unsigned long start, int **x)  { -	struct vm_area_struct *vma = (struct vm_area_struct *)private; +	struct vm_area_struct *vma;  	unsigned long uninitialized_var(address); +	vma = find_vma(current->mm, start);  	while (vma) {  		address = page_address_in_vma(page, vma);  		if (address != -EFAULT)  			break;  		vma = vma->vm_next;  	} -	/* -	 * queue_pages_range() confirms that @page belongs to some vma, -	 * so vma shouldn't be NULL. -	 */ -	BUG_ON(!vma); -	if (PageHuge(page)) +	if (PageHuge(page)) { +		BUG_ON(!vma);  		return alloc_huge_page_noerr(vma, address, 1); +	} +	/* +	 * if !vma, alloc_page_vma() will use task or system default policy +	 */  	return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);  }  #else @@ -1219,7 +1193,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,  	return -ENOSYS;  } -static struct page *new_vma_page(struct page *page, unsigned long private, int **x) +static struct page *new_page(struct page *page, unsigned long start, int **x)  {  	return NULL;  } @@ -1229,7 +1203,6 @@ static long do_mbind(unsigned long start, unsigned long len,  		     unsigned short mode, unsigned short mode_flags,  		     nodemask_t *nmask, unsigned long flags)  { -	struct vm_area_struct *vma;  	struct mm_struct *mm = current->mm;  	struct mempolicy *new;  	unsigned long end; @@ -1295,11 +1268,9 @@ static long do_mbind(unsigned long start, unsigned long len,  	if (err)  		goto mpol_out; -	vma = queue_pages_range(mm, start, end, nmask, +	err = queue_pages_range(mm, start, end, nmask,  			  flags | MPOL_MF_INVERT, &pagelist); - -	err = PTR_ERR(vma);	/* maybe ... */ -	if (!IS_ERR(vma)) +	if (!err)  		err = mbind_range(mm, start, end, new);  	if (!err) { @@ -1307,9 +1278,8 @@ static long do_mbind(unsigned long start, unsigned long len,  		if (!list_empty(&pagelist)) {  			WARN_ON_ONCE(flags & MPOL_MF_LAZY); -			nr_failed = migrate_pages(&pagelist, new_vma_page, -					(unsigned long)vma, -					MIGRATE_SYNC, MR_MEMPOLICY_MBIND); +			nr_failed = migrate_pages(&pagelist, new_page, NULL, +				start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND);  			if (nr_failed)  				putback_movable_pages(&pagelist);  		} @@ -1317,7 +1287,7 @@ static long do_mbind(unsigned long start, unsigned long len,  		if (nr_failed && (flags & MPOL_MF_STRICT))  			err = -EIO;  	} else -		putback_lru_pages(&pagelist); +		putback_movable_pages(&pagelist);  	up_write(&mm->mmap_sem);   mpol_out: @@ -1393,7 +1363,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,  }  SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, -		unsigned long, mode, unsigned long __user *, nmask, +		unsigned long, mode, const unsigned long __user *, nmask,  		unsigned long, maxnode, unsigned, flags)  {  	nodemask_t nodes; @@ -1414,7 +1384,7 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,  }  /* Set the process memory policy */ -SYSCALL_DEFINE3(set_mempolicy, int, mode, unsigned long __user *, nmask, +SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,  		unsigned long, maxnode)  {  	int err; @@ -1556,10 +1526,10 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,  #ifdef CONFIG_COMPAT -asmlinkage long compat_sys_get_mempolicy(int __user *policy, -				     compat_ulong_t __user *nmask, -				     compat_ulong_t maxnode, -				     compat_ulong_t addr, compat_ulong_t flags) +COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, +		       compat_ulong_t __user *, nmask, +		       compat_ulong_t, maxnode, +		       compat_ulong_t, addr, compat_ulong_t, flags)  {  	long err;  	unsigned long __user *nm = NULL; @@ -1586,8 +1556,8 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy,  	return err;  } -asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, -				     compat_ulong_t maxnode) +COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, +		       compat_ulong_t, maxnode)  {  	long err = 0;  	unsigned long __user *nm = NULL; @@ -1609,9 +1579,9 @@ asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,  	return sys_set_mempolicy(mode, nm, nr_bits+1);  } -asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, -			     compat_ulong_t mode, compat_ulong_t __user *nmask, -			     compat_ulong_t maxnode, compat_ulong_t flags) +COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, +		       compat_ulong_t, mode, compat_ulong_t __user *, nmask, +		       compat_ulong_t, maxnode, compat_ulong_t, flags)  {  	long err = 0;  	unsigned long __user *nm = NULL; @@ -1637,9 +1607,9 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,  /*   * get_vma_policy(@task, @vma, @addr) - * @task - task for fallback if vma policy == default - * @vma   - virtual memory area whose policy is sought - * @addr  - address in @vma for shared policy lookup + * @task: task for fallback if vma policy == default + * @vma: virtual memory area whose policy is sought + * @addr: address in @vma for shared policy lookup   *   * Returns effective policy for a VMA at specified address.   * Falls back to @task or system default policy, as necessary. @@ -1679,6 +1649,30 @@ struct mempolicy *get_vma_policy(struct task_struct *task,  	return pol;  } +bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma) +{ +	struct mempolicy *pol = get_task_policy(task); +	if (vma) { +		if (vma->vm_ops && vma->vm_ops->get_policy) { +			bool ret = false; + +			pol = vma->vm_ops->get_policy(vma, vma->vm_start); +			if (pol && (pol->flags & MPOL_F_MOF)) +				ret = true; +			mpol_cond_put(pol); + +			return ret; +		} else if (vma->vm_policy) { +			pol = vma->vm_policy; +		} +	} + +	if (!pol) +		return default_policy.flags & MPOL_F_MOF; + +	return pol->flags & MPOL_F_MOF; +} +  static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone)  {  	enum zone_type dynamic_policy_zone = policy_zone; @@ -1758,21 +1752,18 @@ static unsigned interleave_nodes(struct mempolicy *policy)  /*   * Depending on the memory policy provide a node from which to allocate the   * next slab entry. - * @policy must be protected by freeing by the caller.  If @policy is - * the current task's mempolicy, this protection is implicit, as only the - * task can change it's policy.  The system default policy requires no - * such protection.   */ -unsigned slab_node(void) +unsigned int mempolicy_slab_node(void)  {  	struct mempolicy *policy; +	int node = numa_mem_id();  	if (in_interrupt()) -		return numa_node_id(); +		return node;  	policy = current->mempolicy;  	if (!policy || policy->flags & MPOL_F_LOCAL) -		return numa_node_id(); +		return node;  	switch (policy->mode) {  	case MPOL_PREFERRED: @@ -1792,11 +1783,11 @@ unsigned slab_node(void)  		struct zonelist *zonelist;  		struct zone *zone;  		enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL); -		zonelist = &NODE_DATA(numa_node_id())->node_zonelists[0]; +		zonelist = &NODE_DATA(node)->node_zonelists[0];  		(void)first_zones_zonelist(zonelist, highest_zoneidx,  							&policy->v.nodes,  							&zone); -		return zone ? zone->node : numa_node_id(); +		return zone ? zone->node : node;  	}  	default: @@ -1811,7 +1802,7 @@ static unsigned offset_il_node(struct mempolicy *pol,  	unsigned nnodes = nodes_weight(pol->v.nodes);  	unsigned target;  	int c; -	int nid = -1; +	int nid = NUMA_NO_NODE;  	if (!nnodes)  		return numa_node_id(); @@ -1848,11 +1839,11 @@ static inline unsigned interleave_nid(struct mempolicy *pol,  /*   * Return the bit number of a random bit set in the nodemask. - * (returns -1 if nodemask is empty) + * (returns NUMA_NO_NODE if nodemask is empty)   */  int node_random(const nodemask_t *maskp)  { -	int w, bit = -1; +	int w, bit = NUMA_NO_NODE;  	w = nodes_weight(*maskp);  	if (w) @@ -1864,18 +1855,18 @@ int node_random(const nodemask_t *maskp)  #ifdef CONFIG_HUGETLBFS  /*   * huge_zonelist(@vma, @addr, @gfp_flags, @mpol) - * @vma = virtual memory area whose policy is sought - * @addr = address in @vma for shared policy lookup and interleave policy - * @gfp_flags = for requested zone - * @mpol = pointer to mempolicy pointer for reference counted mempolicy - * @nodemask = pointer to nodemask pointer for MPOL_BIND nodemask + * @vma: virtual memory area whose policy is sought + * @addr: address in @vma for shared policy lookup and interleave policy + * @gfp_flags: for requested zone + * @mpol: pointer to mempolicy pointer for reference counted mempolicy + * @nodemask: pointer to nodemask pointer for MPOL_BIND nodemask   *   * Returns a zonelist suitable for a huge page allocation and a pointer   * to the struct mempolicy for conditional unref after allocation.   * If the effective policy is 'BIND, returns a pointer to the mempolicy's   * @nodemask for filtering the zonelist.   * - * Must be protected by get_mems_allowed() + * Must be protected by read_mems_allowed_begin()   */  struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,  				gfp_t gfp_flags, struct mempolicy **mpol, @@ -2039,7 +2030,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,  retry_cpuset:  	pol = get_vma_policy(current, vma, addr); -	cpuset_mems_cookie = get_mems_allowed(); +	cpuset_mems_cookie = read_mems_allowed_begin();  	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {  		unsigned nid; @@ -2047,7 +2038,7 @@ retry_cpuset:  		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);  		mpol_cond_put(pol);  		page = alloc_page_interleave(gfp, order, nid); -		if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) +		if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))  			goto retry_cpuset;  		return page; @@ -2057,7 +2048,7 @@ retry_cpuset:  				      policy_nodemask(gfp, pol));  	if (unlikely(mpol_needs_cond_ref(pol)))  		__mpol_put(pol); -	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) +	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))  		goto retry_cpuset;  	return page;  } @@ -2091,7 +2082,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)  		pol = &default_policy;  retry_cpuset: -	cpuset_mems_cookie = get_mems_allowed(); +	cpuset_mems_cookie = read_mems_allowed_begin();  	/*  	 * No reference counting needed for current->mempolicy @@ -2104,7 +2095,7 @@ retry_cpuset:  				policy_zonelist(gfp, pol, numa_node_id()),  				policy_nodemask(gfp, pol)); -	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) +	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))  		goto retry_cpuset;  	return page; @@ -2148,7 +2139,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old)  	} else  		*new = *old; -	rcu_read_lock();  	if (current_cpuset_is_being_rebound()) {  		nodemask_t mems = cpuset_mems_allowed(current);  		if (new->flags & MPOL_F_REBINDING) @@ -2156,7 +2146,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old)  		else  			mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE);  	} -	rcu_read_unlock();  	atomic_set(&new->refcnt, 1);  	return new;  } @@ -2280,9 +2269,9 @@ static void sp_free(struct sp_node *n)  /**   * mpol_misplaced - check whether current page node is valid in policy   * - * @page   - page to be checked - * @vma    - vm area where page mapped - * @addr   - virtual address where page mapped + * @page: page to be checked + * @vma: vm area where page mapped + * @addr: virtual address where page mapped   *   * Lookup current policy node id for vma,addr and "compare to" page's   * node id. @@ -2300,6 +2289,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long  	struct zone *zone;  	int curnid = page_to_nid(page);  	unsigned long pgoff; +	int thiscpu = raw_smp_processor_id(); +	int thisnid = cpu_to_node(thiscpu);  	int polnid = -1;  	int ret = -1; @@ -2348,33 +2339,9 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long  	/* Migrate the page towards the node whose CPU is referencing it */  	if (pol->flags & MPOL_F_MORON) { -		int last_nid; - -		polnid = numa_node_id(); +		polnid = thisnid; -		/* -		 * Multi-stage node selection is used in conjunction -		 * with a periodic migration fault to build a temporal -		 * task<->page relation. By using a two-stage filter we -		 * remove short/unlikely relations. -		 * -		 * Using P(p) ~ n_p / n_t as per frequentist -		 * probability, we can equate a task's usage of a -		 * particular page (n_p) per total usage of this -		 * page (n_t) (in a given time-span) to a probability. -		 * -		 * Our periodic faults will sample this probability and -		 * getting the same result twice in a row, given these -		 * samples are fully independent, is then given by -		 * P(n)^2, provided our sample period is sufficiently -		 * short compared to the usage pattern. -		 * -		 * This quadric squishes small probabilities, making -		 * it less likely we act on an unlikely task<->page -		 * relation. -		 */ -		last_nid = page_nid_xchg_last(page, polnid); -		if (last_nid != polnid) +		if (!should_numa_migrate_memory(current, page, curnid, thiscpu))  			goto out;  	} @@ -2580,7 +2547,7 @@ void mpol_free_shared_policy(struct shared_policy *p)  }  #ifdef CONFIG_NUMA_BALANCING -static bool __initdata numabalancing_override; +static int __initdata numabalancing_override;  static void __init check_numabalancing_enable(void)  { @@ -2589,9 +2556,15 @@ static void __init check_numabalancing_enable(void)  	if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED))  		numabalancing_default = true; +	/* Parsed by setup_numabalancing. override == 1 enables, -1 disables */ +	if (numabalancing_override) +		set_numabalancing_state(numabalancing_override == 1); +  	if (nr_node_ids > 1 && !numabalancing_override) { -		printk(KERN_INFO "Enabling automatic NUMA balancing. " -			"Configure with numa_balancing= or sysctl"); +		pr_info("%s automatic NUMA balancing. " +			"Configure with numa_balancing= or the " +			"kernel.numa_balancing sysctl", +			numabalancing_default ? "Enabling" : "Disabling");  		set_numabalancing_state(numabalancing_default);  	}  } @@ -2601,18 +2574,17 @@ static int __init setup_numabalancing(char *str)  	int ret = 0;  	if (!str)  		goto out; -	numabalancing_override = true;  	if (!strcmp(str, "enable")) { -		set_numabalancing_state(true); +		numabalancing_override = 1;  		ret = 1;  	} else if (!strcmp(str, "disable")) { -		set_numabalancing_state(false); +		numabalancing_override = -1;  		ret = 1;  	}  out:  	if (!ret) -		printk(KERN_WARNING "Unable to parse numa_balancing=\n"); +		pr_warn("Unable to parse numa_balancing=\n");  	return ret;  } @@ -2672,7 +2644,7 @@ void __init numa_policy_init(void)  		node_set(prefer, interleave_nodes);  	if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) -		printk("numa_policy_init: interleaving failed\n"); +		pr_err("%s: interleaving failed\n", __func__);  	check_numabalancing_enable();  } @@ -2840,62 +2812,45 @@ out:   * @maxlen:  length of @buffer   * @pol:  pointer to mempolicy to be formatted   * - * Convert a mempolicy into a string. - * Returns the number of characters in buffer (if positive) - * or an error (negative) + * Convert @pol into a string.  If @buffer is too short, truncate the string. + * Recommend a @maxlen of at least 32 for the longest mode, "interleave", the + * longest flag, "relative", and to display at least a few node ids.   */ -int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) +void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)  {  	char *p = buffer; -	int l; -	nodemask_t nodes; -	unsigned short mode; -	unsigned short flags = pol ? pol->flags : 0; +	nodemask_t nodes = NODE_MASK_NONE; +	unsigned short mode = MPOL_DEFAULT; +	unsigned short flags = 0; -	/* -	 * Sanity check:  room for longest mode, flag and some nodes -	 */ -	VM_BUG_ON(maxlen < strlen("interleave") + strlen("relative") + 16); - -	if (!pol || pol == &default_policy) -		mode = MPOL_DEFAULT; -	else +	if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) {  		mode = pol->mode; +		flags = pol->flags; +	}  	switch (mode) {  	case MPOL_DEFAULT: -		nodes_clear(nodes);  		break; -  	case MPOL_PREFERRED: -		nodes_clear(nodes);  		if (flags & MPOL_F_LOCAL)  			mode = MPOL_LOCAL;  		else  			node_set(pol->v.preferred_node, nodes);  		break; -  	case MPOL_BIND: -		/* Fall through */  	case MPOL_INTERLEAVE:  		nodes = pol->v.nodes;  		break; -  	default: -		return -EINVAL; +		WARN_ON_ONCE(1); +		snprintf(p, maxlen, "unknown"); +		return;  	} -	l = strlen(policy_modes[mode]); -	if (buffer + maxlen < p + l + 1) -		return -ENOSPC; - -	strcpy(p, policy_modes[mode]); -	p += l; +	p += snprintf(p, maxlen, "%s", policy_modes[mode]);  	if (flags & MPOL_MODE_FLAGS) { -		if (buffer + maxlen < p + 2) -			return -ENOSPC; -		*p++ = '='; +		p += snprintf(p, buffer + maxlen - p, "=");  		/*  		 * Currently, the only defined flags are mutually exclusive @@ -2907,10 +2862,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)  	}  	if (!nodes_empty(nodes)) { -		if (buffer + maxlen < p + 2) -			return -ENOSPC; -		*p++ = ':'; +		p += snprintf(p, buffer + maxlen - p, ":");  	 	p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);  	} -	return p - buffer;  }  | 
