From 4e76f4e67a106ed827ca721b4c8b622047cd2f6d Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Dec 2010 17:23:47 -0800 Subject: x86, numa: Avoid compiling NUMA emulation functions without CONFIG_NUMA_EMU Both acpi_get_nodes() and amd_get_nodes() are only necessary when CONFIG_NUMA_EMU is enabled, so avoid compiling them when the option is disabled. Signed-off-by: David Rientjes LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/mm/srat_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/mm/srat_64.c') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a35cb9d8b06..8241bf0f6eb 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -339,6 +339,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) void __init acpi_numa_arch_fixup(void) {} +#ifdef CONFIG_NUMA_EMU int __init acpi_get_nodes(struct bootnode *physnodes) { int i; @@ -351,6 +352,7 @@ int __init acpi_get_nodes(struct bootnode *physnodes) } return ret; } +#endif /* CONFIG_NUMA_EMU */ /* Use the information discovered above to actually set up the nodes. */ int __init acpi_scan_nodes(unsigned long start, unsigned long end) -- cgit v1.2.3-18-g5258 From f51bf3073a145a5b3263fd882c52d6ec04b687da Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Dec 2010 17:23:51 -0800 Subject: x86, numa: Fake apicid and pxm mappings for NUMA emulation This patch adds the equivalent of acpi_fake_nodes() for AMD Northbridge platforms. The goal is to fake the apicid-to-node mappings for NUMA emulation so the physical topology of the machine is correctly maintained within the kernel. This change also fakes proximity domains for both ACPI and k8 code so the physical distance between emulated nodes is maintained via node_distance(). This exports the correct distances via /sys/devices/system/node/.../distance based on the underlying topology. A new helper function, fake_physnodes(), is introduced to correctly invoke the correct NUMA code to fake these two mappings based on the system type. If there is no underlying NUMA configuration, all cpus are mapped to node 0 for local distance. Since acpi_fake_nodes() is no longer called with CONFIG_ACPI_NUMA, it's prototype can be removed from the header file for such a configuration. Signed-off-by: David Rientjes LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/mm/srat_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/mm/srat_64.c') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 8241bf0f6eb..c48b443706c 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -497,8 +497,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) { int i, j; - printk(KERN_INFO "Faking PXM affinity for fake nodes on real " - "topology.\n"); for (i = 0; i < num_nodes; i++) { int nid, pxm; -- cgit v1.2.3-18-g5258 From a387e95a49743cf9835c5299ca549232618d8249 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Dec 2010 17:23:56 -0800 Subject: x86, numa: Fix cpu to node mapping for sparse node ids NUMA boot code assumes that physical node ids start at 0, but the DIMMs that the apic id represents may not be reachable. If this is the case, node 0 is never online and cpus never end up getting appropriately assigned to a node. This causes the cpumask of all online nodes to be empty and machines crash with kernel code assuming online nodes have valid cpus. The fix is to appropriately map all the address ranges for physical nodes and ensure the cpu to node mapping function checks all possible nodes (up to MAX_NUMNODES) instead of simply checking nodes 0-N, where N is the number of physical nodes, for valid address ranges. This requires no longer "compressing" the address ranges of nodes in the physical node map from 0-N, but rather leave indices in physnodes[] to represent the actual node id of the physical node. Accordingly, the topology exported by both amd_get_nodes() and acpi_get_nodes() no longer must return the number of nodes to iterate through; all such iterations will now be to MAX_NUMNODES. This change also passes the end address of system RAM (which may be different from normal operation if mem= is specified on the command line) before the physnodes[] array is populated. ACPI parsed nodes are truncated to fit within the address range that respect the mem= boundaries and even some physical nodes may become unreachable in such cases. When NUMA emulation does succeed, any apicid to node mapping that exists for unreachable nodes are given default values so that proximity domains can still be assigned. This is important for node_distance() to function as desired. Signed-off-by: David Rientjes LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/mm/srat_64.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm/srat_64.c') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index c48b443706c..a756bcf3fa4 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -340,17 +340,16 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) void __init acpi_numa_arch_fixup(void) {} #ifdef CONFIG_NUMA_EMU -int __init acpi_get_nodes(struct bootnode *physnodes) +void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, + unsigned long end) { int i; - int ret = 0; for_each_node_mask(i, nodes_parsed) { - physnodes[ret].start = nodes[i].start; - physnodes[ret].end = nodes[i].end; - ret++; + cutoff_node(i, start, end); + physnodes[i].start = nodes[i].start; + physnodes[i].end = nodes[i].end; } - return ret; } #endif /* CONFIG_NUMA_EMU */ @@ -516,6 +515,17 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) fake_apicid_to_node[j] == NUMA_NO_NODE) fake_apicid_to_node[j] = i; } + + /* + * If there are apicid-to-node mappings for physical nodes that do not + * have a corresponding emulated node, it should default to a guaranteed + * value. + */ + for (i = 0; i < MAX_LOCAL_APIC; i++) + if (apicid_to_node[i] != NUMA_NO_NODE && + fake_apicid_to_node[i] == NUMA_NO_NODE) + fake_apicid_to_node[i] = 0; + for (i = 0; i < num_nodes; i++) __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); -- cgit v1.2.3-18-g5258