aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c318
1 files changed, 304 insertions, 14 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 74505b24537..fd481232957 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -20,10 +20,15 @@
#include <linux/memblock.h>
#include <linux/of.h>
#include <linux/pfn.h>
+#include <linux/cpuset.h>
+#include <linux/node.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/system.h>
#include <asm/smp.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+#include <asm/hvcall.h>
static int numa_enabled = 1;
@@ -163,7 +168,7 @@ static void __init get_node_active_region(unsigned long start_pfn,
work_with_active_regions(nid, get_active_region_work_fn, node_ar);
}
-static void __cpuinit map_cpu_to_node(int cpu, int node)
+static void map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
@@ -173,7 +178,7 @@ static void __cpuinit map_cpu_to_node(int cpu, int node)
cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
}
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
static void unmap_cpu_from_node(unsigned long cpu)
{
int node = numa_cpu_lookup_table[cpu];
@@ -181,13 +186,13 @@ static void unmap_cpu_from_node(unsigned long cpu)
dbg("removing cpu %lu from node %d\n", cpu, node);
if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
- cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
} else {
printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
cpu, node);
}
}
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
/* must hold reference to node during call */
static const int *of_get_associativity(struct device_node *dev)
@@ -246,32 +251,41 @@ static void initialize_distance_lookup_table(int nid,
/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
* info is found.
*/
-static int of_node_to_nid_single(struct device_node *device)
+static int associativity_to_nid(const unsigned int *associativity)
{
int nid = -1;
- const unsigned int *tmp;
if (min_common_depth == -1)
goto out;
- tmp = of_get_associativity(device);
- if (!tmp)
- goto out;
-
- if (tmp[0] >= min_common_depth)
- nid = tmp[min_common_depth];
+ if (associativity[0] >= min_common_depth)
+ nid = associativity[min_common_depth];
/* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = -1;
- if (nid > 0 && tmp[0] >= distance_ref_points_depth)
- initialize_distance_lookup_table(nid, tmp);
+ if (nid > 0 && associativity[0] >= distance_ref_points_depth)
+ initialize_distance_lookup_table(nid, associativity);
out:
return nid;
}
+/* Returns the nid associated with the given device tree node,
+ * or -1 if not found.
+ */
+static int of_node_to_nid_single(struct device_node *device)
+{
+ int nid = -1;
+ const unsigned int *tmp;
+
+ tmp = of_get_associativity(device);
+ if (tmp)
+ nid = associativity_to_nid(tmp);
+ return nid;
+}
+
/* Walk the device tree upwards, looking for an associativity id */
int of_node_to_nid(struct device_node *device)
{
@@ -1247,4 +1261,280 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
return nid;
}
+static u64 hot_add_drconf_memory_max(void)
+{
+ struct device_node *memory = NULL;
+ unsigned int drconf_cell_cnt = 0;
+ u64 lmb_size = 0;
+ const u32 *dm = 0;
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (memory) {
+ drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
+ lmb_size = of_get_lmb_size(memory);
+ of_node_put(memory);
+ }
+ return lmb_size * drconf_cell_cnt;
+}
+
+/*
+ * memory_hotplug_max - return max address of memory that may be added
+ *
+ * This is currently only used on systems that support drconfig memory
+ * hotplug.
+ */
+u64 memory_hotplug_max(void)
+{
+ return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
+}
#endif /* CONFIG_MEMORY_HOTPLUG */
+
+/* Virtual Processor Home Node (VPHN) support */
+#ifdef CONFIG_PPC_SPLPAR
+static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
+static cpumask_t cpu_associativity_changes_mask;
+static int vphn_enabled;
+static void set_topology_timer(void);
+
+/*
+ * Store the current values of the associativity change counters in the
+ * hypervisor.
+ */
+static void setup_cpu_associativity_change_counters(void)
+{
+ int cpu;
+
+ /* The VPHN feature supports a maximum of 8 reference points */
+ BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ u8 *counts = vphn_cpu_change_counts[cpu];
+ volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+
+ for (i = 0; i < distance_ref_points_depth; i++)
+ counts[i] = hypervisor_counts[i];
+ }
+}
+
+/*
+ * The hypervisor maintains a set of 8 associativity change counters in
+ * the VPA of each cpu that correspond to the associativity levels in the
+ * ibm,associativity-reference-points property. When an associativity
+ * level changes, the corresponding counter is incremented.
+ *
+ * Set a bit in cpu_associativity_changes_mask for each cpu whose home
+ * node associativity levels have changed.
+ *
+ * Returns the number of cpus with unhandled associativity changes.
+ */
+static int update_cpu_associativity_changes_mask(void)
+{
+ int cpu, nr_cpus = 0;
+ cpumask_t *changes = &cpu_associativity_changes_mask;
+
+ cpumask_clear(changes);
+
+ for_each_possible_cpu(cpu) {
+ int i, changed = 0;
+ u8 *counts = vphn_cpu_change_counts[cpu];
+ volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ if (hypervisor_counts[i] != counts[i]) {
+ counts[i] = hypervisor_counts[i];
+ changed = 1;
+ }
+ }
+ if (changed) {
+ cpumask_set_cpu(cpu, changes);
+ nr_cpus++;
+ }
+ }
+
+ return nr_cpus;
+}
+
+/*
+ * 6 64-bit registers unpacked into 12 32-bit associativity values. To form
+ * the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1)
+
+/*
+ * Convert the associativity domain numbers returned from the hypervisor
+ * to the sequence they would appear in the ibm,associativity property.
+ */
+static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked)
+{
+ int i, nr_assoc_doms = 0;
+ const u16 *field = (const u16*) packed;
+
+#define VPHN_FIELD_UNUSED (0xffff)
+#define VPHN_FIELD_MSB (0x8000)
+#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
+
+ for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
+ if (*field == VPHN_FIELD_UNUSED) {
+ /* All significant fields processed, and remaining
+ * fields contain the reserved value of all 1's.
+ * Just store them.
+ */
+ unpacked[i] = *((u32*)field);
+ field += 2;
+ } else if (*field & VPHN_FIELD_MSB) {
+ /* Data is in the lower 15 bits of this field */
+ unpacked[i] = *field & VPHN_FIELD_MASK;
+ field++;
+ nr_assoc_doms++;
+ } else {
+ /* Data is in the lower 15 bits of this field
+ * concatenated with the next 16 bit field
+ */
+ unpacked[i] = *((u32*)field);
+ field += 2;
+ nr_assoc_doms++;
+ }
+ }
+
+ /* The first cell contains the length of the property */
+ unpacked[0] = nr_assoc_doms;
+
+ return nr_assoc_doms;
+}
+
+/*
+ * Retrieve the new associativity information for a virtual processor's
+ * home node.
+ */
+static long hcall_vphn(unsigned long cpu, unsigned int *associativity)
+{
+ long rc;
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ u64 flags = 1;
+ int hwcpu = get_hard_smp_processor_id(cpu);
+
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
+ vphn_unpack_associativity(retbuf, associativity);
+
+ return rc;
+}
+
+static long vphn_get_associativity(unsigned long cpu,
+ unsigned int *associativity)
+{
+ long rc;
+
+ rc = hcall_vphn(cpu, associativity);
+
+ switch (rc) {
+ case H_FUNCTION:
+ printk(KERN_INFO
+ "VPHN is not supported. Disabling polling...\n");
+ stop_topology_update();
+ break;
+ case H_HARDWARE:
+ printk(KERN_ERR
+ "hcall_vphn() experienced a hardware fault "
+ "preventing VPHN. Disabling polling...\n");
+ stop_topology_update();
+ }
+
+ return rc;
+}
+
+/*
+ * Update the node maps and sysfs entries for each cpu whose home node
+ * has changed.
+ */
+int arch_update_cpu_topology(void)
+{
+ int cpu, nid, old_nid;
+ unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ struct sys_device *sysdev;
+
+ for_each_cpu_mask(cpu, cpu_associativity_changes_mask) {
+ vphn_get_associativity(cpu, associativity);
+ nid = associativity_to_nid(associativity);
+
+ if (nid < 0 || !node_online(nid))
+ nid = first_online_node;
+
+ old_nid = numa_cpu_lookup_table[cpu];
+
+ /* Disable hotplug while we update the cpu
+ * masks and sysfs.
+ */
+ get_online_cpus();
+ unregister_cpu_under_node(cpu, old_nid);
+ unmap_cpu_from_node(cpu);
+ map_cpu_to_node(cpu, nid);
+ register_cpu_under_node(cpu, nid);
+ put_online_cpus();
+
+ sysdev = get_cpu_sysdev(cpu);
+ if (sysdev)
+ kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+ }
+
+ return 1;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+ rebuild_sched_domains();
+}
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+void topology_schedule_update(void)
+{
+ schedule_work(&topology_work);
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+ if (!vphn_enabled)
+ return;
+ if (update_cpu_associativity_changes_mask() > 0)
+ topology_schedule_update();
+ set_topology_timer();
+}
+static struct timer_list topology_timer =
+ TIMER_INITIALIZER(topology_timer_fn, 0, 0);
+
+static void set_topology_timer(void)
+{
+ topology_timer.data = 0;
+ topology_timer.expires = jiffies + 60 * HZ;
+ add_timer(&topology_timer);
+}
+
+/*
+ * Start polling for VPHN associativity changes.
+ */
+int start_topology_update(void)
+{
+ int rc = 0;
+
+ if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ get_lppaca()->shared_proc) {
+ vphn_enabled = 1;
+ setup_cpu_associativity_change_counters();
+ init_timer_deferrable(&topology_timer);
+ set_topology_timer();
+ rc = 1;
+ }
+
+ return rc;
+}
+__initcall(start_topology_update);
+
+/*
+ * Disable polling for VPHN associativity changes.
+ */
+int stop_topology_update(void)
+{
+ vphn_enabled = 0;
+ return del_timer_sync(&topology_timer);
+}
+#endif /* CONFIG_PPC_SPLPAR */