diff options
Diffstat (limited to 'arch/x86/mm/numa.c')
| -rw-r--r-- | arch/x86/mm/numa.c | 156 |
1 files changed, 102 insertions, 54 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 19d3fa08b11..a32b706c401 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -56,11 +56,11 @@ early_param("numa", numa_setup); /* * apicid, cpu, node mappings */ -s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { +s16 __apicid_to_node[MAX_LOCAL_APIC] = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; -int __cpuinit numa_cpu_node(int cpu) +int numa_cpu_node(int cpu) { int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); @@ -78,7 +78,7 @@ EXPORT_SYMBOL(node_to_cpumask_map); DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); -void __cpuinit numa_set_node(int cpu, int node) +void numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); @@ -97,11 +97,10 @@ void __cpuinit numa_set_node(int cpu, int node) #endif per_cpu(x86_cpu_to_node_map, cpu) = node; - if (node != NUMA_NO_NODE) - set_cpu_numa_node(cpu, node); + set_cpu_numa_node(cpu, node); } -void __cpuinit numa_clear_node(int cpu) +void numa_clear_node(int cpu) { numa_set_node(cpu, NUMA_NO_NODE); } @@ -115,14 +114,11 @@ void __cpuinit numa_clear_node(int cpu) */ void __init setup_node_to_cpumask_map(void) { - unsigned int node, num = 0; + unsigned int node; /* setup nr_node_ids if not done yet */ - if (nr_node_ids == MAX_NUMNODES) { - for_each_node_mask(node, node_possible_map) - num = node; - nr_node_ids = num + 1; - } + if (nr_node_ids == MAX_NUMNODES) + setup_nr_node_ids(); /* allocate the map */ for (node = 0; node < nr_node_ids; node++) @@ -141,8 +137,8 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end, /* whine about and ignore invalid blks */ if (start > end || nid < 0 || nid >= MAX_NUMNODES) { - pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", - nid, start, end); + pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + nid, start, end - 1); return 0; } @@ -193,7 +189,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) static void __init setup_node_data(int nid, u64 start, u64 end) { const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); - bool remapped = false; u64 nd_pa; void *nd; int tnid; @@ -205,37 +200,32 @@ static void __init setup_node_data(int nid, u64 start, u64 end) if (end && (end - start) < NODE_MIN_SIZE) return; - /* initialize remap allocator before aligning to ZONE_ALIGN */ - init_alloc_remap(nid, start, end); - start = roundup(start, ZONE_ALIGN); - printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", - nid, start, end); + printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start, end - 1); /* - * Allocate node data. Try remap allocator first, node-local - * memory and then any node. Never allocate in DMA zone. + * Allocate node data. Try node-local memory and then any node. + * Never allocate in DMA zone. */ - nd = alloc_remap(nid, nd_size); - if (nd) { - nd_pa = __pa(nd); - remapped = true; - } else { - nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); + nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); + if (!nd_pa) { + nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES, + MEMBLOCK_ALLOC_ACCESSIBLE); if (!nd_pa) { pr_err("Cannot find %zu bytes in node %d\n", nd_size, nid); return; } - nd = __va(nd_pa); } + nd = __va(nd_pa); /* report and initialize */ - printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", - nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); + printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); - if (!remapped && tnid != nid) + if (tnid != nid) printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); node_data[nid] = nd; @@ -291,14 +281,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) */ if (bi->end > bj->start && bi->start < bj->end) { if (bi->nid != bj->nid) { - pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", - bi->nid, bi->start, bi->end, - bj->nid, bj->start, bj->end); + pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n", + bi->nid, bi->start, bi->end - 1, + bj->nid, bj->start, bj->end - 1); return -EINVAL; } - pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", - bi->nid, bi->start, bi->end, - bj->start, bj->end); + pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n", + bi->nid, bi->start, bi->end - 1, + bj->start, bj->end - 1); } /* @@ -320,9 +310,9 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) } if (k < mi->nr_blks) continue; - printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", - bi->nid, bi->start, bi->end, bj->start, bj->end, - start, end); + printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n", + bi->nid, bi->start, bi->end - 1, bj->start, + bj->end - 1, start, end - 1); bi->start = start; bi->end = end; numa_remove_memblk_from(j--, mi); @@ -501,7 +491,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) for (i = 0; i < mi->nr_blks; i++) { struct numa_memblk *mb = &mi->blk[i]; - memblock_set_node(mb->start, mb->end - mb->start, mb->nid); + memblock_set_node(mb->start, mb->end - mb->start, + &memblock.memory, mb->nid); } /* @@ -563,6 +554,41 @@ static void __init numa_init_array(void) } } +static void __init numa_clear_kernel_node_hotplug(void) +{ + int i, nid; + nodemask_t numa_kernel_nodes = NODE_MASK_NONE; + unsigned long start, end; + struct memblock_region *r; + + /* + * At this time, all memory regions reserved by memblock are + * used by the kernel. Set the nid in memblock.reserved will + * mark out all the nodes the kernel resides in. + */ + for (i = 0; i < numa_meminfo.nr_blks; i++) { + struct numa_memblk *mb = &numa_meminfo.blk[i]; + memblock_set_node(mb->start, mb->end - mb->start, + &memblock.reserved, mb->nid); + } + + /* Mark all kernel nodes. */ + for_each_memblock(reserved, r) + node_set(r->nid, numa_kernel_nodes); + + /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ + for (i = 0; i < numa_meminfo.nr_blks; i++) { + nid = numa_meminfo.blk[i].nid; + if (!node_isset(nid, numa_kernel_nodes)) + continue; + + start = numa_meminfo.blk[i].start; + end = numa_meminfo.blk[i].end; + + memblock_clear_hotplug(start, end - start); + } +} + static int __init numa_init(int (*init_func)(void)) { int i; @@ -575,12 +601,28 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(node_possible_map); nodes_clear(node_online_map); memset(&numa_meminfo, 0, sizeof(numa_meminfo)); - WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); + WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory, + MAX_NUMNODES)); + WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved, + MAX_NUMNODES)); + /* In case that parsing SRAT failed. */ + WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX)); numa_reset_distance(); ret = init_func(); if (ret < 0) return ret; + + /* + * We reset memblock back to the top-down direction + * here because if we configured ACPI_NUMA, we have + * parsed SRAT in init_func(). It is ok to have the + * reset here even if we did't configure ACPI_NUMA + * or acpi numa init fails and fallbacks to dummy + * numa init. + */ + memblock_set_bottom_up(false); + ret = numa_cleanup_meminfo(&numa_meminfo); if (ret < 0) return ret; @@ -600,6 +642,16 @@ static int __init numa_init(int (*init_func)(void)) numa_clear_node(i); } numa_init_array(); + + /* + * At very early time, the kernel have to use some memory such as + * loading the kernel image. We cannot prevent this anyway. So any + * node the kernel resides in should be un-hotpluggable. + * + * And when we come here, numa_init() won't fail. + */ + numa_clear_kernel_node_hotplug(); + return 0; } @@ -616,8 +668,8 @@ static int __init dummy_numa_init(void) { printk(KERN_INFO "%s\n", numa_off ? "NUMA turned off" : "No NUMA configuration found"); - printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", - 0LLU, PFN_PHYS(max_pfn)); + printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n", + 0LLU, PFN_PHYS(max_pfn) - 1); node_set(0, numa_nodes_parsed); numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); @@ -635,10 +687,6 @@ static int __init dummy_numa_init(void) void __init x86_numa_init(void) { if (!numa_off) { -#ifdef CONFIG_X86_NUMAQ - if (!numa_init(numaq_numa_init)) - return; -#endif #ifdef CONFIG_ACPI_NUMA if (!numa_init(x86_acpi_numa_init)) return; @@ -705,12 +753,12 @@ void __init init_cpu_to_node(void) #ifndef CONFIG_DEBUG_PER_CPU_MAPS # ifndef CONFIG_NUMA_EMU -void __cpuinit numa_add_cpu(int cpu) +void numa_add_cpu(int cpu) { cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } -void __cpuinit numa_remove_cpu(int cpu) +void numa_remove_cpu(int cpu) { cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } @@ -777,17 +825,17 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable) } # ifndef CONFIG_NUMA_EMU -static void __cpuinit numa_set_cpumask(int cpu, bool enable) +static void numa_set_cpumask(int cpu, bool enable) { debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable); } -void __cpuinit numa_add_cpu(int cpu) +void numa_add_cpu(int cpu) { numa_set_cpumask(cpu, true); } -void __cpuinit numa_remove_cpu(int cpu) +void numa_remove_cpu(int cpu) { numa_set_cpumask(cpu, false); } |
