diff options
Diffstat (limited to 'arch/powerpc/platforms/powernv/subcore.c')
| -rw-r--r-- | arch/powerpc/platforms/powernv/subcore.c | 392 | 
1 files changed, 392 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c new file mode 100644 index 00000000000..894ecb3eb59 --- /dev/null +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -0,0 +1,392 @@ +/* + * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt)	"powernv: " fmt + +#include <linux/kernel.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/device.h> +#include <linux/gfp.h> +#include <linux/smp.h> +#include <linux/stop_machine.h> + +#include <asm/cputhreads.h> +#include <asm/kvm_ppc.h> +#include <asm/machdep.h> +#include <asm/opal.h> +#include <asm/smp.h> + +#include "subcore.h" + + +/* + * Split/unsplit procedure: + * + * A core can be in one of three states, unsplit, 2-way split, and 4-way split. + * + * The mapping to subcores_per_core is simple: + * + *  State       | subcores_per_core + *  ------------|------------------ + *  Unsplit     |        1 + *  2-way split |        2 + *  4-way split |        4 + * + * The core is split along thread boundaries, the mapping between subcores and + * threads is as follows: + * + *  Unsplit: + *          ---------------------------- + *  Subcore |            0             | + *          ---------------------------- + *  Thread  |  0  1  2  3  4  5  6  7  | + *          ---------------------------- + * + *  2-way split: + *          ------------------------------------- + *  Subcore |        0        |        1        | + *          ------------------------------------- + *  Thread  |  0   1   2   3  |  4   5   6   7  | + *          ------------------------------------- + * + *  4-way split: + *          ----------------------------------------- + *  Subcore |    0    |    1    |    2    |    3    | + *          ----------------------------------------- + *  Thread  |  0   1  |  2   3  |  4   5  |  6   7  | + *          ----------------------------------------- + * + * + * Transitions + * ----------- + * + * It is not possible to transition between either of the split states, the + * core must first be unsplit. The legal transitions are: + * + *  -----------          --------------- + *  |         |  <---->  | 2-way split | + *  |         |          --------------- + *  | Unsplit | + *  |         |          --------------- + *  |         |  <---->  | 4-way split | + *  -----------          --------------- + * + * Unsplitting + * ----------- + * + * Unsplitting is the simpler procedure. It requires thread 0 to request the + * unsplit while all other threads NAP. + * + * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells + * the hardware that if all threads except 0 are napping, the hardware should + * unsplit the core. + * + * Non-zero threads are sent to a NAP loop, they don't exit the loop until they + * see the core unsplit. + * + * Core 0 spins waiting for the hardware to see all the other threads napping + * and perform the unsplit. + * + * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them + * out of NAP. They will then see the core unsplit and exit the NAP loop. + * + * Splitting + * --------- + * + * The basic splitting procedure is fairly straight forward. However it is + * complicated by the fact that after the split occurs, the newly created + * subcores are not in a fully initialised state. + * + * Most notably the subcores do not have the correct value for SDR1, which + * means they must not be running in virtual mode when the split occurs. The + * subcores have separate timebases SPRs but these are pre-synchronised by + * opal. + * + * To begin with secondary threads are sent to an assembly routine. There they + * switch to real mode, so they are immune to the uninitialised SDR1 value. + * Once in real mode they indicate that they are in real mode, and spin waiting + * to see the core split. + * + * Thread 0 waits to see that all secondaries are in real mode, and then begins + * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which + * prevents the hardware from unsplitting. Then it sets the appropriate HID bit + * to request the split, and spins waiting to see that the split has happened. + * + * Concurrently the secondaries will notice the split. When they do they set up + * their SPRs, notably SDR1, and then they can return to virtual mode and exit + * the procedure. + */ + +/* Initialised at boot by subcore_init() */ +static int subcores_per_core; + +/* + * Used to communicate to offline cpus that we want them to pop out of the + * offline loop and do a split or unsplit. + * + * 0 - no split happening + * 1 - unsplit in progress + * 2 - split to 2 in progress + * 4 - split to 4 in progress + */ +static int new_split_mode; + +static cpumask_var_t cpu_offline_mask; + +struct split_state { +	u8 step; +	u8 master; +}; + +static DEFINE_PER_CPU(struct split_state, split_state); + +static void wait_for_sync_step(int step) +{ +	int i, cpu = smp_processor_id(); + +	for (i = cpu + 1; i < cpu + threads_per_core; i++) +		while(per_cpu(split_state, i).step < step) +			barrier(); + +	/* Order the wait loop vs any subsequent loads/stores. */ +	mb(); +} + +static void unsplit_core(void) +{ +	u64 hid0, mask; +	int i, cpu; + +	mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; + +	cpu = smp_processor_id(); +	if (cpu_thread_in_core(cpu) != 0) { +		while (mfspr(SPRN_HID0) & mask) +			power7_nap(0); + +		per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; +		return; +	} + +	hid0 = mfspr(SPRN_HID0); +	hid0 &= ~HID0_POWER8_DYNLPARDIS; +	mtspr(SPRN_HID0, hid0); + +	while (mfspr(SPRN_HID0) & mask) +		cpu_relax(); + +	/* Wake secondaries out of NAP */ +	for (i = cpu + 1; i < cpu + threads_per_core; i++) +		smp_send_reschedule(i); + +	wait_for_sync_step(SYNC_STEP_UNSPLIT); +} + +static void split_core(int new_mode) +{ +	struct {  u64 value; u64 mask; } split_parms[2] = { +		{ HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE }, +		{ HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE } +	}; +	int i, cpu; +	u64 hid0; + +	/* Convert new_mode (2 or 4) into an index into our parms array */ +	i = (new_mode >> 1) - 1; +	BUG_ON(i < 0 || i > 1); + +	cpu = smp_processor_id(); +	if (cpu_thread_in_core(cpu) != 0) { +		split_core_secondary_loop(&per_cpu(split_state, cpu).step); +		return; +	} + +	wait_for_sync_step(SYNC_STEP_REAL_MODE); + +	/* Write new mode */ +	hid0  = mfspr(SPRN_HID0); +	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; +	mtspr(SPRN_HID0, hid0); + +	/* Wait for it to happen */ +	while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) +		cpu_relax(); +} + +static void cpu_do_split(int new_mode) +{ +	/* +	 * At boot subcores_per_core will be 0, so we will always unsplit at +	 * boot. In the usual case where the core is already unsplit it's a +	 * nop, and this just ensures the kernel's notion of the mode is +	 * consistent with the hardware. +	 */ +	if (subcores_per_core != 1) +		unsplit_core(); + +	if (new_mode != 1) +		split_core(new_mode); + +	mb(); +	per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED; +} + +bool cpu_core_split_required(void) +{ +	smp_rmb(); + +	if (!new_split_mode) +		return false; + +	cpu_do_split(new_split_mode); + +	return true; +} + +static int cpu_update_split_mode(void *data) +{ +	int cpu, new_mode = *(int *)data; + +	if (this_cpu_ptr(&split_state)->master) { +		new_split_mode = new_mode; +		smp_wmb(); + +		cpumask_andnot(cpu_offline_mask, cpu_present_mask, +			       cpu_online_mask); + +		/* This should work even though the cpu is offline */ +		for_each_cpu(cpu, cpu_offline_mask) +			smp_send_reschedule(cpu); +	} + +	cpu_do_split(new_mode); + +	if (this_cpu_ptr(&split_state)->master) { +		/* Wait for all cpus to finish before we touch subcores_per_core */ +		for_each_present_cpu(cpu) { +			if (cpu >= setup_max_cpus) +				break; + +			while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED) +				barrier(); +		} + +		new_split_mode = 0; + +		/* Make the new mode public */ +		subcores_per_core = new_mode; +		threads_per_subcore = threads_per_core / subcores_per_core; + +		/* Make sure the new mode is written before we exit */ +		mb(); +	} + +	return 0; +} + +static int set_subcores_per_core(int new_mode) +{ +	struct split_state *state; +	int cpu; + +	if (kvm_hv_mode_active()) { +		pr_err("Unable to change split core mode while KVM active.\n"); +		return -EBUSY; +	} + +	/* +	 * We are only called at boot, or from the sysfs write. If that ever +	 * changes we'll need a lock here. +	 */ +	BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3); + +	for_each_present_cpu(cpu) { +		state = &per_cpu(split_state, cpu); +		state->step = SYNC_STEP_INITIAL; +		state->master = 0; +	} + +	get_online_cpus(); + +	/* This cpu will update the globals before exiting stop machine */ +	this_cpu_ptr(&split_state)->master = 1; + +	/* Ensure state is consistent before we call the other cpus */ +	mb(); + +	stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask); + +	put_online_cpus(); + +	return 0; +} + +static ssize_t __used store_subcores_per_core(struct device *dev, +		struct device_attribute *attr, const char *buf, +		size_t count) +{ +	unsigned long val; +	int rc; + +	/* We are serialised by the attribute lock */ + +	rc = sscanf(buf, "%lx", &val); +	if (rc != 1) +		return -EINVAL; + +	switch (val) { +	case 1: +	case 2: +	case 4: +		if (subcores_per_core == val) +			/* Nothing to do */ +			goto out; +		break; +	default: +		return -EINVAL; +	} + +	rc = set_subcores_per_core(val); +	if (rc) +		return rc; + +out: +	return count; +} + +static ssize_t show_subcores_per_core(struct device *dev, +		struct device_attribute *attr, char *buf) +{ +	return sprintf(buf, "%x\n", subcores_per_core); +} + +static DEVICE_ATTR(subcores_per_core, 0644, +		show_subcores_per_core, store_subcores_per_core); + +static int subcore_init(void) +{ +	if (!cpu_has_feature(CPU_FTR_ARCH_207S)) +		return 0; + +	/* +	 * We need all threads in a core to be present to split/unsplit so +         * continue only if max_cpus are aligned to threads_per_core. +	 */ +	if (setup_max_cpus % threads_per_core) +		return 0; + +	BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL)); + +	set_subcores_per_core(1); + +	return device_create_file(cpu_subsys.dev_root, +				  &dev_attr_subcores_per_core); +} +machine_device_initcall(powernv, subcore_init);  | 
