aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/numa.c')
-rw-r--r--arch/x86/mm/numa.c126
1 files changed, 87 insertions, 39 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 2d125be1bae..a32b706c401 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -56,11 +56,11 @@ early_param("numa", numa_setup);
/*
* apicid, cpu, node mappings
*/
-s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+s16 __apicid_to_node[MAX_LOCAL_APIC] = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
-int __cpuinit numa_cpu_node(int cpu)
+int numa_cpu_node(int cpu)
{
int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-void __cpuinit numa_set_node(int cpu, int node)
+void numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
@@ -97,11 +97,10 @@ void __cpuinit numa_set_node(int cpu, int node)
#endif
per_cpu(x86_cpu_to_node_map, cpu) = node;
- if (node != NUMA_NO_NODE)
- set_cpu_numa_node(cpu, node);
+ set_cpu_numa_node(cpu, node);
}
-void __cpuinit numa_clear_node(int cpu)
+void numa_clear_node(int cpu)
{
numa_set_node(cpu, NUMA_NO_NODE);
}
@@ -115,14 +114,11 @@ void __cpuinit numa_clear_node(int cpu)
*/
void __init setup_node_to_cpumask_map(void)
{
- unsigned int node, num = 0;
+ unsigned int node;
/* setup nr_node_ids if not done yet */
- if (nr_node_ids == MAX_NUMNODES) {
- for_each_node_mask(node, node_possible_map)
- num = node;
- nr_node_ids = num + 1;
- }
+ if (nr_node_ids == MAX_NUMNODES)
+ setup_nr_node_ids();
/* allocate the map */
for (node = 0; node < nr_node_ids; node++)
@@ -193,7 +189,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
static void __init setup_node_data(int nid, u64 start, u64 end)
{
const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
- bool remapped = false;
u64 nd_pa;
void *nd;
int tnid;
@@ -205,37 +200,32 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
if (end && (end - start) < NODE_MIN_SIZE)
return;
- /* initialize remap allocator before aligning to ZONE_ALIGN */
- init_alloc_remap(nid, start, end);
-
start = roundup(start, ZONE_ALIGN);
printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
nid, start, end - 1);
/*
- * Allocate node data. Try remap allocator first, node-local
- * memory and then any node. Never allocate in DMA zone.
+ * Allocate node data. Try node-local memory and then any node.
+ * Never allocate in DMA zone.
*/
- nd = alloc_remap(nid, nd_size);
- if (nd) {
- nd_pa = __pa(nd);
- remapped = true;
- } else {
- nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+ if (!nd_pa) {
+ nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
+ MEMBLOCK_ALLOC_ACCESSIBLE);
if (!nd_pa) {
pr_err("Cannot find %zu bytes in node %d\n",
nd_size, nid);
return;
}
- nd = __va(nd_pa);
}
+ nd = __va(nd_pa);
/* report and initialize */
- printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n",
- nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
+ printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ nd_pa, nd_pa + nd_size - 1);
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
- if (!remapped && tnid != nid)
+ if (tnid != nid)
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
node_data[nid] = nd;
@@ -501,7 +491,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
for (i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *mb = &mi->blk[i];
- memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
+ memblock_set_node(mb->start, mb->end - mb->start,
+ &memblock.memory, mb->nid);
}
/*
@@ -563,6 +554,41 @@ static void __init numa_init_array(void)
}
}
+static void __init numa_clear_kernel_node_hotplug(void)
+{
+ int i, nid;
+ nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
+ unsigned long start, end;
+ struct memblock_region *r;
+
+ /*
+ * At this time, all memory regions reserved by memblock are
+ * used by the kernel. Set the nid in memblock.reserved will
+ * mark out all the nodes the kernel resides in.
+ */
+ for (i = 0; i < numa_meminfo.nr_blks; i++) {
+ struct numa_memblk *mb = &numa_meminfo.blk[i];
+ memblock_set_node(mb->start, mb->end - mb->start,
+ &memblock.reserved, mb->nid);
+ }
+
+ /* Mark all kernel nodes. */
+ for_each_memblock(reserved, r)
+ node_set(r->nid, numa_kernel_nodes);
+
+ /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
+ for (i = 0; i < numa_meminfo.nr_blks; i++) {
+ nid = numa_meminfo.blk[i].nid;
+ if (!node_isset(nid, numa_kernel_nodes))
+ continue;
+
+ start = numa_meminfo.blk[i].start;
+ end = numa_meminfo.blk[i].end;
+
+ memblock_clear_hotplug(start, end - start);
+ }
+}
+
static int __init numa_init(int (*init_func)(void))
{
int i;
@@ -575,12 +601,28 @@ static int __init numa_init(int (*init_func)(void))
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
- WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
+ WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
+ MAX_NUMNODES));
+ WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
+ MAX_NUMNODES));
+ /* In case that parsing SRAT failed. */
+ WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
numa_reset_distance();
ret = init_func();
if (ret < 0)
return ret;
+
+ /*
+ * We reset memblock back to the top-down direction
+ * here because if we configured ACPI_NUMA, we have
+ * parsed SRAT in init_func(). It is ok to have the
+ * reset here even if we did't configure ACPI_NUMA
+ * or acpi numa init fails and fallbacks to dummy
+ * numa init.
+ */
+ memblock_set_bottom_up(false);
+
ret = numa_cleanup_meminfo(&numa_meminfo);
if (ret < 0)
return ret;
@@ -600,6 +642,16 @@ static int __init numa_init(int (*init_func)(void))
numa_clear_node(i);
}
numa_init_array();
+
+ /*
+ * At very early time, the kernel have to use some memory such as
+ * loading the kernel image. We cannot prevent this anyway. So any
+ * node the kernel resides in should be un-hotpluggable.
+ *
+ * And when we come here, numa_init() won't fail.
+ */
+ numa_clear_kernel_node_hotplug();
+
return 0;
}
@@ -635,10 +687,6 @@ static int __init dummy_numa_init(void)
void __init x86_numa_init(void)
{
if (!numa_off) {
-#ifdef CONFIG_X86_NUMAQ
- if (!numa_init(numaq_numa_init))
- return;
-#endif
#ifdef CONFIG_ACPI_NUMA
if (!numa_init(x86_acpi_numa_init))
return;
@@ -705,12 +753,12 @@ void __init init_cpu_to_node(void)
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
# ifndef CONFIG_NUMA_EMU
-void __cpuinit numa_add_cpu(int cpu)
+void numa_add_cpu(int cpu)
{
cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
-void __cpuinit numa_remove_cpu(int cpu)
+void numa_remove_cpu(int cpu)
{
cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
@@ -777,17 +825,17 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable)
}
# ifndef CONFIG_NUMA_EMU
-static void __cpuinit numa_set_cpumask(int cpu, bool enable)
+static void numa_set_cpumask(int cpu, bool enable)
{
debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
}
-void __cpuinit numa_add_cpu(int cpu)
+void numa_add_cpu(int cpu)
{
numa_set_cpumask(cpu, true);
}
-void __cpuinit numa_remove_cpu(int cpu)
+void numa_remove_cpu(int cpu)
{
numa_set_cpumask(cpu, false);
}