diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/gup.c | 12 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_nohash.c | 12 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 311 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 2 |
6 files changed, 321 insertions, 21 deletions
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index d7efdbf640c..fec13200868 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -16,6 +16,16 @@ #ifdef __HAVE_ARCH_PTE_SPECIAL +static inline void get_huge_page_tail(struct page *page) +{ + /* + * __split_huge_page_refcount() cannot run + * from under us. + */ + VM_BUG_ON(atomic_read(&page->_count) < 0); + atomic_inc(&page->_count); +} + /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much @@ -47,6 +57,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, put_page(page); return 0; } + if (PageTail(page)) + get_huge_page_tail(page); pages[*nr] = page; (*nr)++; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5e9584405c4..a5991facddc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1070,7 +1070,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { unsigned long vsid; - void *pgdir; + pgd_t *pgdir; pte_t *ptep; unsigned long flags; int rc, ssize, local = 0; diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 5ce99848d91..c0aab52da3a 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -111,8 +111,8 @@ static unsigned int steal_context_smp(unsigned int id) * a core map instead but this will do for now. */ for_each_cpu(cpu, mm_cpumask(mm)) { - for (i = cpu_first_thread_in_core(cpu); - i <= cpu_last_thread_in_core(cpu); i++) + for (i = cpu_first_thread_sibling(cpu); + i <= cpu_last_thread_sibling(cpu); i++) __set_bit(id, stale_map[i]); cpu = i - 1; } @@ -264,14 +264,14 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) */ if (test_bit(id, stale_map[cpu])) { pr_hardcont(" | stale flush %d [%d..%d]", - id, cpu_first_thread_in_core(cpu), - cpu_last_thread_in_core(cpu)); + id, cpu_first_thread_sibling(cpu), + cpu_last_thread_sibling(cpu)); local_flush_tlb_mm(next); /* XXX This clear should ultimately be part of local_flush_tlb_mm */ - for (i = cpu_first_thread_in_core(cpu); - i <= cpu_last_thread_in_core(cpu); i++) { + for (i = cpu_first_thread_sibling(cpu); + i <= cpu_last_thread_sibling(cpu); i++) { __clear_bit(id, stale_map[i]); } } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 74505b24537..bf5cb91f07d 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -20,10 +20,15 @@ #include <linux/memblock.h> #include <linux/of.h> #include <linux/pfn.h> +#include <linux/cpuset.h> +#include <linux/node.h> #include <asm/sparsemem.h> #include <asm/prom.h> #include <asm/system.h> #include <asm/smp.h> +#include <asm/firmware.h> +#include <asm/paca.h> +#include <asm/hvcall.h> static int numa_enabled = 1; @@ -163,7 +168,7 @@ static void __init get_node_active_region(unsigned long start_pfn, work_with_active_regions(nid, get_active_region_work_fn, node_ar); } -static void __cpuinit map_cpu_to_node(int cpu, int node) +static void map_cpu_to_node(int cpu, int node) { numa_cpu_lookup_table[cpu] = node; @@ -173,7 +178,7 @@ static void __cpuinit map_cpu_to_node(int cpu, int node) cpumask_set_cpu(cpu, node_to_cpumask_map[node]); } -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR) static void unmap_cpu_from_node(unsigned long cpu) { int node = numa_cpu_lookup_table[cpu]; @@ -187,7 +192,7 @@ static void unmap_cpu_from_node(unsigned long cpu) cpu, node); } } -#endif /* CONFIG_HOTPLUG_CPU */ +#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ /* must hold reference to node during call */ static const int *of_get_associativity(struct device_node *dev) @@ -246,32 +251,41 @@ static void initialize_distance_lookup_table(int nid, /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa * info is found. */ -static int of_node_to_nid_single(struct device_node *device) +static int associativity_to_nid(const unsigned int *associativity) { int nid = -1; - const unsigned int *tmp; if (min_common_depth == -1) goto out; - tmp = of_get_associativity(device); - if (!tmp) - goto out; - - if (tmp[0] >= min_common_depth) - nid = tmp[min_common_depth]; + if (associativity[0] >= min_common_depth) + nid = associativity[min_common_depth]; /* POWER4 LPAR uses 0xffff as invalid node */ if (nid == 0xffff || nid >= MAX_NUMNODES) nid = -1; - if (nid > 0 && tmp[0] >= distance_ref_points_depth) - initialize_distance_lookup_table(nid, tmp); + if (nid > 0 && associativity[0] >= distance_ref_points_depth) + initialize_distance_lookup_table(nid, associativity); out: return nid; } +/* Returns the nid associated with the given device tree node, + * or -1 if not found. + */ +static int of_node_to_nid_single(struct device_node *device) +{ + int nid = -1; + const unsigned int *tmp; + + tmp = of_get_associativity(device); + if (tmp) + nid = associativity_to_nid(tmp); + return nid; +} + /* Walk the device tree upwards, looking for an associativity id */ int of_node_to_nid(struct device_node *device) { @@ -1247,4 +1261,275 @@ int hot_add_scn_to_nid(unsigned long scn_addr) return nid; } +static u64 hot_add_drconf_memory_max(void) +{ + struct device_node *memory = NULL; + unsigned int drconf_cell_cnt = 0; + u64 lmb_size = 0; + const u32 *dm = 0; + + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (memory) { + drconf_cell_cnt = of_get_drconf_memory(memory, &dm); + lmb_size = of_get_lmb_size(memory); + of_node_put(memory); + } + return lmb_size * drconf_cell_cnt; +} + +/* + * memory_hotplug_max - return max address of memory that may be added + * + * This is currently only used on systems that support drconfig memory + * hotplug. + */ +u64 memory_hotplug_max(void) +{ + return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); +} #endif /* CONFIG_MEMORY_HOTPLUG */ + +/* Vrtual Processor Home Node (VPHN) support */ +#ifdef CONFIG_PPC_SPLPAR +#define VPHN_NR_CHANGE_CTRS (8) +static u8 vphn_cpu_change_counts[NR_CPUS][VPHN_NR_CHANGE_CTRS]; +static cpumask_t cpu_associativity_changes_mask; +static int vphn_enabled; +static void set_topology_timer(void); + +/* + * Store the current values of the associativity change counters in the + * hypervisor. + */ +static void setup_cpu_associativity_change_counters(void) +{ + int cpu = 0; + + for_each_possible_cpu(cpu) { + int i = 0; + u8 *counts = vphn_cpu_change_counts[cpu]; + volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; + + for (i = 0; i < VPHN_NR_CHANGE_CTRS; i++) { + counts[i] = hypervisor_counts[i]; + } + } +} + +/* + * The hypervisor maintains a set of 8 associativity change counters in + * the VPA of each cpu that correspond to the associativity levels in the + * ibm,associativity-reference-points property. When an associativity + * level changes, the corresponding counter is incremented. + * + * Set a bit in cpu_associativity_changes_mask for each cpu whose home + * node associativity levels have changed. + * + * Returns the number of cpus with unhandled associativity changes. + */ +static int update_cpu_associativity_changes_mask(void) +{ + int cpu = 0, nr_cpus = 0; + cpumask_t *changes = &cpu_associativity_changes_mask; + + cpumask_clear(changes); + + for_each_possible_cpu(cpu) { + int i, changed = 0; + u8 *counts = vphn_cpu_change_counts[cpu]; + volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; + + for (i = 0; i < VPHN_NR_CHANGE_CTRS; i++) { + if (hypervisor_counts[i] > counts[i]) { + counts[i] = hypervisor_counts[i]; + changed = 1; + } + } + if (changed) { + cpumask_set_cpu(cpu, changes); + nr_cpus++; + } + } + + return nr_cpus; +} + +/* 6 64-bit registers unpacked into 12 32-bit associativity values */ +#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32)) + +/* + * Convert the associativity domain numbers returned from the hypervisor + * to the sequence they would appear in the ibm,associativity property. + */ +static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked) +{ + int i = 0; + int nr_assoc_doms = 0; + const u16 *field = (const u16*) packed; + +#define VPHN_FIELD_UNUSED (0xffff) +#define VPHN_FIELD_MSB (0x8000) +#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB) + + for (i = 0; i < VPHN_ASSOC_BUFSIZE; i++) { + if (*field == VPHN_FIELD_UNUSED) { + /* All significant fields processed, and remaining + * fields contain the reserved value of all 1's. + * Just store them. + */ + unpacked[i] = *((u32*)field); + field += 2; + } + else if (*field & VPHN_FIELD_MSB) { + /* Data is in the lower 15 bits of this field */ + unpacked[i] = *field & VPHN_FIELD_MASK; + field++; + nr_assoc_doms++; + } + else { + /* Data is in the lower 15 bits of this field + * concatenated with the next 16 bit field + */ + unpacked[i] = *((u32*)field); + field += 2; + nr_assoc_doms++; + } + } + + return nr_assoc_doms; +} + +/* + * Retrieve the new associativity information for a virtual processor's + * home node. + */ +static long hcall_vphn(unsigned long cpu, unsigned int *associativity) +{ + long rc = 0; + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + u64 flags = 1; + int hwcpu = get_hard_smp_processor_id(cpu); + + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); + vphn_unpack_associativity(retbuf, associativity); + + return rc; +} + +static long vphn_get_associativity(unsigned long cpu, + unsigned int *associativity) +{ + long rc = 0; + + rc = hcall_vphn(cpu, associativity); + + switch (rc) { + case H_FUNCTION: + printk(KERN_INFO + "VPHN is not supported. Disabling polling...\n"); + stop_topology_update(); + break; + case H_HARDWARE: + printk(KERN_ERR + "hcall_vphn() experienced a hardware fault " + "preventing VPHN. Disabling polling...\n"); + stop_topology_update(); + } + + return rc; +} + +/* + * Update the node maps and sysfs entries for each cpu whose home node + * has changed. + */ +int arch_update_cpu_topology(void) +{ + int cpu = 0, nid = 0, old_nid = 0; + unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; + struct sys_device *sysdev = NULL; + + for_each_cpu_mask(cpu, cpu_associativity_changes_mask) { + vphn_get_associativity(cpu, associativity); + nid = associativity_to_nid(associativity); + + if (nid < 0 || !node_online(nid)) + nid = first_online_node; + + old_nid = numa_cpu_lookup_table[cpu]; + + /* Disable hotplug while we update the cpu + * masks and sysfs. + */ + get_online_cpus(); + unregister_cpu_under_node(cpu, old_nid); + unmap_cpu_from_node(cpu); + map_cpu_to_node(cpu, nid); + register_cpu_under_node(cpu, nid); + put_online_cpus(); + + sysdev = get_cpu_sysdev(cpu); + if (sysdev) + kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + } + + return 1; +} + +static void topology_work_fn(struct work_struct *work) +{ + rebuild_sched_domains(); +} +static DECLARE_WORK(topology_work, topology_work_fn); + +void topology_schedule_update(void) +{ + schedule_work(&topology_work); +} + +static void topology_timer_fn(unsigned long ignored) +{ + if (!vphn_enabled) + return; + if (update_cpu_associativity_changes_mask() > 0) + topology_schedule_update(); + set_topology_timer(); +} +static struct timer_list topology_timer = + TIMER_INITIALIZER(topology_timer_fn, 0, 0); + +static void set_topology_timer(void) +{ + topology_timer.data = 0; + topology_timer.expires = jiffies + 60 * HZ; + add_timer(&topology_timer); +} + +/* + * Start polling for VPHN associativity changes. + */ +int start_topology_update(void) +{ + int rc = 0; + + if (firmware_has_feature(FW_FEATURE_VPHN)) { + vphn_enabled = 1; + setup_cpu_associativity_change_counters(); + init_timer_deferrable(&topology_timer); + set_topology_timer(); + rc = 1; + } + + return rc; +} +__initcall(start_topology_update); + +/* + * Disable polling for VPHN associativity changes. + */ +int stop_topology_update(void) +{ + vphn_enabled = 0; + return del_timer_sync(&topology_timer); +} +#endif /* CONFIG_PPC_SPLPAR */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a87ead0138b..8dc41c0157f 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -78,7 +78,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) /* pgdir take page or two with 4K pages and a page fraction otherwise */ #ifndef CONFIG_PPC_4K_PAGES - ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); + ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); #else ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER - PAGE_SHIFT); @@ -230,6 +230,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, area = get_vm_area_caller(size, VM_IOREMAP, caller); if (area == 0) return NULL; + area->phys_addr = p; v = (unsigned long) area->addr; } else { v = (ioremap_bot -= size); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 21d6dfab794..88927a05cdc 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -223,6 +223,8 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, caller); if (area == NULL) return NULL; + + area->phys_addr = paligned; ret = __ioremap_at(paligned, area->addr, size, flags); if (!ret) vunmap(area->addr); |