diff options
Diffstat (limited to 'drivers/powercap')
| -rw-r--r-- | drivers/powercap/Kconfig | 32 | ||||
| -rw-r--r-- | drivers/powercap/Makefile | 2 | ||||
| -rw-r--r-- | drivers/powercap/intel_rapl.c | 1406 | ||||
| -rw-r--r-- | drivers/powercap/powercap_sys.c | 690 | 
4 files changed, 2130 insertions, 0 deletions
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig new file mode 100644 index 00000000000..a7c81b53d88 --- /dev/null +++ b/drivers/powercap/Kconfig @@ -0,0 +1,32 @@ +# +# Generic power capping sysfs interface configuration +# + +menuconfig POWERCAP +	bool "Generic powercap sysfs driver" +	help +	  The power capping sysfs interface allows kernel subsystems to expose power +	  capping settings to user space in a consistent way.  Usually, it consists +	  of multiple control types that determine which settings may be exposed and +	  power zones representing parts of the system that can be subject to power +	  capping. + +	  If you want this code to be compiled in, say Y here. + +if POWERCAP +# Client driver configurations go here. +config INTEL_RAPL +	tristate "Intel RAPL Support" +	depends on X86 +	default n +	---help--- +	  This enables support for the Intel Running Average Power Limit (RAPL) +	  technology which allows power limits to be enforced and monitored on +	  modern Intel processors (Sandy Bridge and later). + +	  In RAPL, the platform level settings are divided into domains for +	  fine grained control. These domains include processor package, DRAM +	  controller, CPU core (Power Plance 0), graphics uncore (Power Plane +	  1), etc. + +endif diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile new file mode 100644 index 00000000000..0a21ef31372 --- /dev/null +++ b/drivers/powercap/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_POWERCAP)	+= powercap_sys.o +obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c new file mode 100644 index 00000000000..b1cda6ffdbc --- /dev/null +++ b/drivers/powercap/intel_rapl.c @@ -0,0 +1,1406 @@ +/* + * Intel Running Average Power Limit (RAPL) Driver + * Copyright (c) 2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/log2.h> +#include <linux/bitmap.h> +#include <linux/delay.h> +#include <linux/sysfs.h> +#include <linux/cpu.h> +#include <linux/powercap.h> + +#include <asm/processor.h> +#include <asm/cpu_device_id.h> + +/* bitmasks for RAPL MSRs, used by primitive access functions */ +#define ENERGY_STATUS_MASK      0xffffffff + +#define POWER_LIMIT1_MASK       0x7FFF +#define POWER_LIMIT1_ENABLE     BIT(15) +#define POWER_LIMIT1_CLAMP      BIT(16) + +#define POWER_LIMIT2_MASK       (0x7FFFULL<<32) +#define POWER_LIMIT2_ENABLE     BIT_ULL(47) +#define POWER_LIMIT2_CLAMP      BIT_ULL(48) +#define POWER_PACKAGE_LOCK      BIT_ULL(63) +#define POWER_PP_LOCK           BIT(31) + +#define TIME_WINDOW1_MASK       (0x7FULL<<17) +#define TIME_WINDOW2_MASK       (0x7FULL<<49) + +#define POWER_UNIT_OFFSET	0 +#define POWER_UNIT_MASK		0x0F + +#define ENERGY_UNIT_OFFSET	0x08 +#define ENERGY_UNIT_MASK	0x1F00 + +#define TIME_UNIT_OFFSET	0x10 +#define TIME_UNIT_MASK		0xF0000 + +#define POWER_INFO_MAX_MASK     (0x7fffULL<<32) +#define POWER_INFO_MIN_MASK     (0x7fffULL<<16) +#define POWER_INFO_MAX_TIME_WIN_MASK     (0x3fULL<<48) +#define POWER_INFO_THERMAL_SPEC_MASK     0x7fff + +#define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff +#define PP_POLICY_MASK         0x1F + +/* Non HW constants */ +#define RAPL_PRIMITIVE_DERIVED       BIT(1) /* not from raw data */ +#define RAPL_PRIMITIVE_DUMMY         BIT(2) + +/* scale RAPL units to avoid floating point math inside kernel */ +#define POWER_UNIT_SCALE     (1000000) +#define ENERGY_UNIT_SCALE    (1000000) +#define TIME_UNIT_SCALE      (1000000) + +#define TIME_WINDOW_MAX_MSEC 40000 +#define TIME_WINDOW_MIN_MSEC 250 + +enum unit_type { +	ARBITRARY_UNIT, /* no translation */ +	POWER_UNIT, +	ENERGY_UNIT, +	TIME_UNIT, +}; + +enum rapl_domain_type { +	RAPL_DOMAIN_PACKAGE, /* entire package/socket */ +	RAPL_DOMAIN_PP0, /* core power plane */ +	RAPL_DOMAIN_PP1, /* graphics uncore */ +	RAPL_DOMAIN_DRAM,/* DRAM control_type */ +	RAPL_DOMAIN_MAX, +}; + +enum rapl_domain_msr_id { +	RAPL_DOMAIN_MSR_LIMIT, +	RAPL_DOMAIN_MSR_STATUS, +	RAPL_DOMAIN_MSR_PERF, +	RAPL_DOMAIN_MSR_POLICY, +	RAPL_DOMAIN_MSR_INFO, +	RAPL_DOMAIN_MSR_MAX, +}; + +/* per domain data, some are optional */ +enum rapl_primitives { +	ENERGY_COUNTER, +	POWER_LIMIT1, +	POWER_LIMIT2, +	FW_LOCK, + +	PL1_ENABLE,  /* power limit 1, aka long term */ +	PL1_CLAMP,   /* allow frequency to go below OS request */ +	PL2_ENABLE,  /* power limit 2, aka short term, instantaneous */ +	PL2_CLAMP, + +	TIME_WINDOW1, /* long term */ +	TIME_WINDOW2, /* short term */ +	THERMAL_SPEC_POWER, +	MAX_POWER, + +	MIN_POWER, +	MAX_TIME_WINDOW, +	THROTTLED_TIME, +	PRIORITY_LEVEL, + +	/* below are not raw primitive data */ +	AVERAGE_POWER, +	NR_RAPL_PRIMITIVES, +}; + +#define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) + +/* Can be expanded to include events, etc.*/ +struct rapl_domain_data { +	u64 primitives[NR_RAPL_PRIMITIVES]; +	unsigned long timestamp; +}; + + +#define	DOMAIN_STATE_INACTIVE           BIT(0) +#define	DOMAIN_STATE_POWER_LIMIT_SET    BIT(1) +#define DOMAIN_STATE_BIOS_LOCKED        BIT(2) + +#define NR_POWER_LIMITS (2) +struct rapl_power_limit { +	struct powercap_zone_constraint *constraint; +	int prim_id; /* primitive ID used to enable */ +	struct rapl_domain *domain; +	const char *name; +}; + +static const char pl1_name[] = "long_term"; +static const char pl2_name[] = "short_term"; + +struct rapl_domain { +	const char *name; +	enum rapl_domain_type id; +	int msrs[RAPL_DOMAIN_MSR_MAX]; +	struct powercap_zone power_zone; +	struct rapl_domain_data rdd; +	struct rapl_power_limit rpl[NR_POWER_LIMITS]; +	u64 attr_map; /* track capabilities */ +	unsigned int state; +	int package_id; +}; +#define power_zone_to_rapl_domain(_zone) \ +	container_of(_zone, struct rapl_domain, power_zone) + + +/* Each physical package contains multiple domains, these are the common + * data across RAPL domains within a package. + */ +struct rapl_package { +	unsigned int id; /* physical package/socket id */ +	unsigned int nr_domains; +	unsigned long domain_map; /* bit map of active domains */ +	unsigned int power_unit_divisor; +	unsigned int energy_unit_divisor; +	unsigned int time_unit_divisor; +	struct rapl_domain *domains; /* array of domains, sized at runtime */ +	struct powercap_zone *power_zone; /* keep track of parent zone */ +	int nr_cpus; /* active cpus on the package, topology info is lost during +		      * cpu hotplug. so we have to track ourselves. +		      */ +	unsigned long power_limit_irq; /* keep track of package power limit +					* notify interrupt enable status. +					*/ +	struct list_head plist; +}; +#define PACKAGE_PLN_INT_SAVED   BIT(0) +#define MAX_PRIM_NAME (32) + +/* per domain data. used to describe individual knobs such that access function + * can be consolidated into one instead of many inline functions. + */ +struct rapl_primitive_info { +	const char *name; +	u64 mask; +	int shift; +	enum rapl_domain_msr_id id; +	enum unit_type unit; +	u32 flag; +}; + +#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) {	\ +		.name = #p,			\ +		.mask = m,			\ +		.shift = s,			\ +		.id = i,			\ +		.unit = u,			\ +		.flag = f			\ +	} + +static void rapl_init_domains(struct rapl_package *rp); +static int rapl_read_data_raw(struct rapl_domain *rd, +			enum rapl_primitives prim, +			bool xlate, u64 *data); +static int rapl_write_data_raw(struct rapl_domain *rd, +			enum rapl_primitives prim, +			unsigned long long value); +static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, +			int to_raw); +static void package_power_limit_irq_save(int package_id); + +static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ + +static const char * const rapl_domain_names[] = { +	"package", +	"core", +	"uncore", +	"dram", +}; + +static struct powercap_control_type *control_type; /* PowerCap Controller */ + +/* caller to ensure CPU hotplug lock is held */ +static struct rapl_package *find_package_by_id(int id) +{ +	struct rapl_package *rp; + +	list_for_each_entry(rp, &rapl_packages, plist) { +		if (rp->id == id) +			return rp; +	} + +	return NULL; +} + +/* caller to ensure CPU hotplug lock is held */ +static int find_active_cpu_on_package(int package_id) +{ +	int i; + +	for_each_online_cpu(i) { +		if (topology_physical_package_id(i) == package_id) +			return i; +	} +	/* all CPUs on this package are offline */ + +	return -ENODEV; +} + +/* caller must hold cpu hotplug lock */ +static void rapl_cleanup_data(void) +{ +	struct rapl_package *p, *tmp; + +	list_for_each_entry_safe(p, tmp, &rapl_packages, plist) { +		kfree(p->domains); +		list_del(&p->plist); +		kfree(p); +	} +} + +static int get_energy_counter(struct powercap_zone *power_zone, u64 *energy_raw) +{ +	struct rapl_domain *rd; +	u64 energy_now; + +	/* prevent CPU hotplug, make sure the RAPL domain does not go +	 * away while reading the counter. +	 */ +	get_online_cpus(); +	rd = power_zone_to_rapl_domain(power_zone); + +	if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) { +		*energy_raw = energy_now; +		put_online_cpus(); + +		return 0; +	} +	put_online_cpus(); + +	return -EIO; +} + +static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy) +{ +	*energy = rapl_unit_xlate(0, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); +	return 0; +} + +static int release_zone(struct powercap_zone *power_zone) +{ +	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); +	struct rapl_package *rp; + +	/* package zone is the last zone of a package, we can free +	 * memory here since all children has been unregistered. +	 */ +	if (rd->id == RAPL_DOMAIN_PACKAGE) { +		rp = find_package_by_id(rd->package_id); +		if (!rp) { +			dev_warn(&power_zone->dev, "no package id %s\n", +				rd->name); +			return -ENODEV; +		} +		kfree(rd); +		rp->domains = NULL; +	} + +	return 0; + +} + +static int find_nr_power_limit(struct rapl_domain *rd) +{ +	int i; + +	for (i = 0; i < NR_POWER_LIMITS; i++) { +		if (rd->rpl[i].name == NULL) +			break; +	} + +	return i; +} + +static int set_domain_enable(struct powercap_zone *power_zone, bool mode) +{ +	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); +	int nr_powerlimit; + +	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) +		return -EACCES; +	get_online_cpus(); +	nr_powerlimit = find_nr_power_limit(rd); +	/* here we activate/deactivate the hardware for power limiting */ +	rapl_write_data_raw(rd, PL1_ENABLE, mode); +	/* always enable clamp such that p-state can go below OS requested +	 * range. power capping priority over guranteed frequency. +	 */ +	rapl_write_data_raw(rd, PL1_CLAMP, mode); +	/* some domains have pl2 */ +	if (nr_powerlimit > 1) { +		rapl_write_data_raw(rd, PL2_ENABLE, mode); +		rapl_write_data_raw(rd, PL2_CLAMP, mode); +	} +	put_online_cpus(); + +	return 0; +} + +static int get_domain_enable(struct powercap_zone *power_zone, bool *mode) +{ +	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); +	u64 val; + +	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { +		*mode = false; +		return 0; +	} +	get_online_cpus(); +	if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) { +		put_online_cpus(); +		return -EIO; +	} +	*mode = val; +	put_online_cpus(); + +	return 0; +} + +/* per RAPL domain ops, in the order of rapl_domain_type */ +static struct powercap_zone_ops zone_ops[] = { +	/* RAPL_DOMAIN_PACKAGE */ +	{ +		.get_energy_uj = get_energy_counter, +		.get_max_energy_range_uj = get_max_energy_counter, +		.release = release_zone, +		.set_enable = set_domain_enable, +		.get_enable = get_domain_enable, +	}, +	/* RAPL_DOMAIN_PP0 */ +	{ +		.get_energy_uj = get_energy_counter, +		.get_max_energy_range_uj = get_max_energy_counter, +		.release = release_zone, +		.set_enable = set_domain_enable, +		.get_enable = get_domain_enable, +	}, +	/* RAPL_DOMAIN_PP1 */ +	{ +		.get_energy_uj = get_energy_counter, +		.get_max_energy_range_uj = get_max_energy_counter, +		.release = release_zone, +		.set_enable = set_domain_enable, +		.get_enable = get_domain_enable, +	}, +	/* RAPL_DOMAIN_DRAM */ +	{ +		.get_energy_uj = get_energy_counter, +		.get_max_energy_range_uj = get_max_energy_counter, +		.release = release_zone, +		.set_enable = set_domain_enable, +		.get_enable = get_domain_enable, +	}, +}; + +static int set_power_limit(struct powercap_zone *power_zone, int id, +			u64 power_limit) +{ +	struct rapl_domain *rd; +	struct rapl_package *rp; +	int ret = 0; + +	get_online_cpus(); +	rd = power_zone_to_rapl_domain(power_zone); +	rp = find_package_by_id(rd->package_id); +	if (!rp) { +		ret = -ENODEV; +		goto set_exit; +	} + +	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { +		dev_warn(&power_zone->dev, "%s locked by BIOS, monitoring only\n", +			rd->name); +		ret = -EACCES; +		goto set_exit; +	} + +	switch (rd->rpl[id].prim_id) { +	case PL1_ENABLE: +		rapl_write_data_raw(rd, POWER_LIMIT1, power_limit); +		break; +	case PL2_ENABLE: +		rapl_write_data_raw(rd, POWER_LIMIT2, power_limit); +		break; +	default: +		ret = -EINVAL; +	} +	if (!ret) +		package_power_limit_irq_save(rd->package_id); +set_exit: +	put_online_cpus(); +	return ret; +} + +static int get_current_power_limit(struct powercap_zone *power_zone, int id, +					u64 *data) +{ +	struct rapl_domain *rd; +	u64 val; +	int prim; +	int ret = 0; + +	get_online_cpus(); +	rd = power_zone_to_rapl_domain(power_zone); +	switch (rd->rpl[id].prim_id) { +	case PL1_ENABLE: +		prim = POWER_LIMIT1; +		break; +	case PL2_ENABLE: +		prim = POWER_LIMIT2; +		break; +	default: +		put_online_cpus(); +		return -EINVAL; +	} +	if (rapl_read_data_raw(rd, prim, true, &val)) +		ret = -EIO; +	else +		*data = val; + +	put_online_cpus(); + +	return ret; +} + +static int set_time_window(struct powercap_zone *power_zone, int id, +								u64 window) +{ +	struct rapl_domain *rd; +	int ret = 0; + +	get_online_cpus(); +	rd = power_zone_to_rapl_domain(power_zone); +	switch (rd->rpl[id].prim_id) { +	case PL1_ENABLE: +		rapl_write_data_raw(rd, TIME_WINDOW1, window); +		break; +	case PL2_ENABLE: +		rapl_write_data_raw(rd, TIME_WINDOW2, window); +		break; +	default: +		ret = -EINVAL; +	} +	put_online_cpus(); +	return ret; +} + +static int get_time_window(struct powercap_zone *power_zone, int id, u64 *data) +{ +	struct rapl_domain *rd; +	u64 val; +	int ret = 0; + +	get_online_cpus(); +	rd = power_zone_to_rapl_domain(power_zone); +	switch (rd->rpl[id].prim_id) { +	case PL1_ENABLE: +		ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val); +		break; +	case PL2_ENABLE: +		ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val); +		break; +	default: +		put_online_cpus(); +		return -EINVAL; +	} +	if (!ret) +		*data = val; +	put_online_cpus(); + +	return ret; +} + +static const char *get_constraint_name(struct powercap_zone *power_zone, int id) +{ +	struct rapl_power_limit *rpl; +	struct rapl_domain *rd; + +	rd = power_zone_to_rapl_domain(power_zone); +	rpl = (struct rapl_power_limit *) &rd->rpl[id]; + +	return rpl->name; +} + + +static int get_max_power(struct powercap_zone *power_zone, int id, +					u64 *data) +{ +	struct rapl_domain *rd; +	u64 val; +	int prim; +	int ret = 0; + +	get_online_cpus(); +	rd = power_zone_to_rapl_domain(power_zone); +	switch (rd->rpl[id].prim_id) { +	case PL1_ENABLE: +		prim = THERMAL_SPEC_POWER; +		break; +	case PL2_ENABLE: +		prim = MAX_POWER; +		break; +	default: +		put_online_cpus(); +		return -EINVAL; +	} +	if (rapl_read_data_raw(rd, prim, true, &val)) +		ret = -EIO; +	else +		*data = val; + +	put_online_cpus(); + +	return ret; +} + +static struct powercap_zone_constraint_ops constraint_ops = { +	.set_power_limit_uw = set_power_limit, +	.get_power_limit_uw = get_current_power_limit, +	.set_time_window_us = set_time_window, +	.get_time_window_us = get_time_window, +	.get_max_power_uw = get_max_power, +	.get_name = get_constraint_name, +}; + +/* called after domain detection and package level data are set */ +static void rapl_init_domains(struct rapl_package *rp) +{ +	int i; +	struct rapl_domain *rd = rp->domains; + +	for (i = 0; i < RAPL_DOMAIN_MAX; i++) { +		unsigned int mask = rp->domain_map & (1 << i); +		switch (mask) { +		case BIT(RAPL_DOMAIN_PACKAGE): +			rd->name = rapl_domain_names[RAPL_DOMAIN_PACKAGE]; +			rd->id = RAPL_DOMAIN_PACKAGE; +			rd->msrs[0] = MSR_PKG_POWER_LIMIT; +			rd->msrs[1] = MSR_PKG_ENERGY_STATUS; +			rd->msrs[2] = MSR_PKG_PERF_STATUS; +			rd->msrs[3] = 0; +			rd->msrs[4] = MSR_PKG_POWER_INFO; +			rd->rpl[0].prim_id = PL1_ENABLE; +			rd->rpl[0].name = pl1_name; +			rd->rpl[1].prim_id = PL2_ENABLE; +			rd->rpl[1].name = pl2_name; +			break; +		case BIT(RAPL_DOMAIN_PP0): +			rd->name = rapl_domain_names[RAPL_DOMAIN_PP0]; +			rd->id = RAPL_DOMAIN_PP0; +			rd->msrs[0] = MSR_PP0_POWER_LIMIT; +			rd->msrs[1] = MSR_PP0_ENERGY_STATUS; +			rd->msrs[2] = 0; +			rd->msrs[3] = MSR_PP0_POLICY; +			rd->msrs[4] = 0; +			rd->rpl[0].prim_id = PL1_ENABLE; +			rd->rpl[0].name = pl1_name; +			break; +		case BIT(RAPL_DOMAIN_PP1): +			rd->name = rapl_domain_names[RAPL_DOMAIN_PP1]; +			rd->id = RAPL_DOMAIN_PP1; +			rd->msrs[0] = MSR_PP1_POWER_LIMIT; +			rd->msrs[1] = MSR_PP1_ENERGY_STATUS; +			rd->msrs[2] = 0; +			rd->msrs[3] = MSR_PP1_POLICY; +			rd->msrs[4] = 0; +			rd->rpl[0].prim_id = PL1_ENABLE; +			rd->rpl[0].name = pl1_name; +			break; +		case BIT(RAPL_DOMAIN_DRAM): +			rd->name = rapl_domain_names[RAPL_DOMAIN_DRAM]; +			rd->id = RAPL_DOMAIN_DRAM; +			rd->msrs[0] = MSR_DRAM_POWER_LIMIT; +			rd->msrs[1] = MSR_DRAM_ENERGY_STATUS; +			rd->msrs[2] = MSR_DRAM_PERF_STATUS; +			rd->msrs[3] = 0; +			rd->msrs[4] = MSR_DRAM_POWER_INFO; +			rd->rpl[0].prim_id = PL1_ENABLE; +			rd->rpl[0].name = pl1_name; +			break; +		} +		if (mask) { +			rd->package_id = rp->id; +			rd++; +		} +	} +} + +static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, +			int to_raw) +{ +	u64 divisor = 1; +	int scale = 1; /* scale to user friendly data without floating point */ +	u64 f, y; /* fraction and exp. used for time unit */ +	struct rapl_package *rp; + +	rp = find_package_by_id(package); +	if (!rp) +		return value; + +	switch (type) { +	case POWER_UNIT: +		divisor = rp->power_unit_divisor; +		scale = POWER_UNIT_SCALE; +		break; +	case ENERGY_UNIT: +		scale = ENERGY_UNIT_SCALE; +		divisor = rp->energy_unit_divisor; +		break; +	case TIME_UNIT: +		divisor = rp->time_unit_divisor; +		scale = TIME_UNIT_SCALE; +		/* special processing based on 2^Y*(1+F)/4 = val/divisor, refer +		 * to Intel Software Developer's manual Vol. 3a, CH 14.7.4. +		 */ +		if (!to_raw) { +			f = (value & 0x60) >> 5; +			y = value & 0x1f; +			value = (1 << y) * (4 + f) * scale / 4; +			return div64_u64(value, divisor); +		} else { +			do_div(value, scale); +			value *= divisor; +			y = ilog2(value); +			f = div64_u64(4 * (value - (1 << y)), 1 << y); +			value = (y & 0x1f) | ((f & 0x3) << 5); +			return value; +		} +		break; +	case ARBITRARY_UNIT: +	default: +		return value; +	}; + +	if (to_raw) +		return div64_u64(value * divisor, scale); +	else +		return div64_u64(value * scale, divisor); +} + +/* in the order of enum rapl_primitives */ +static struct rapl_primitive_info rpi[] = { +	/* name, mask, shift, msr index, unit divisor */ +	PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, +				RAPL_DOMAIN_MSR_STATUS, ENERGY_UNIT, 0), +	PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, +				RAPL_DOMAIN_MSR_LIMIT, POWER_UNIT, 0), +	PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, +				RAPL_DOMAIN_MSR_LIMIT, POWER_UNIT, 0), +	PRIMITIVE_INFO_INIT(FW_LOCK, POWER_PP_LOCK, 31, +				RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), +	PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, +				RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), +	PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, +				RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), +	PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, +				RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), +	PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, +				RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), +	PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, +				RAPL_DOMAIN_MSR_LIMIT, TIME_UNIT, 0), +	PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, +				RAPL_DOMAIN_MSR_LIMIT, TIME_UNIT, 0), +	PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, +				0, RAPL_DOMAIN_MSR_INFO, POWER_UNIT, 0), +	PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, +				RAPL_DOMAIN_MSR_INFO, POWER_UNIT, 0), +	PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, +				RAPL_DOMAIN_MSR_INFO, POWER_UNIT, 0), +	PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, +				RAPL_DOMAIN_MSR_INFO, TIME_UNIT, 0), +	PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, +				RAPL_DOMAIN_MSR_PERF, TIME_UNIT, 0), +	PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, +				RAPL_DOMAIN_MSR_POLICY, ARBITRARY_UNIT, 0), +	/* non-hardware */ +	PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, +				RAPL_PRIMITIVE_DERIVED), +	{NULL, 0, 0, 0}, +}; + +/* Read primitive data based on its related struct rapl_primitive_info. + * if xlate flag is set, return translated data based on data units, i.e. + * time, energy, and power. + * RAPL MSRs are non-architectual and are laid out not consistently across + * domains. Here we use primitive info to allow writing consolidated access + * functions. + * For a given primitive, it is processed by MSR mask and shift. Unit conversion + * is pre-assigned based on RAPL unit MSRs read at init time. + * 63-------------------------- 31--------------------------- 0 + * |                           xxxxx (mask)                   | + * |                                |<- shift ----------------| + * 63-------------------------- 31--------------------------- 0 + */ +static int rapl_read_data_raw(struct rapl_domain *rd, +			enum rapl_primitives prim, +			bool xlate, u64 *data) +{ +	u64 value, final; +	u32 msr; +	struct rapl_primitive_info *rp = &rpi[prim]; +	int cpu; + +	if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY) +		return -EINVAL; + +	msr = rd->msrs[rp->id]; +	if (!msr) +		return -EINVAL; +	/* use physical package id to look up active cpus */ +	cpu = find_active_cpu_on_package(rd->package_id); +	if (cpu < 0) +		return cpu; + +	/* special-case package domain, which uses a different bit*/ +	if (prim == FW_LOCK && rd->id == RAPL_DOMAIN_PACKAGE) { +		rp->mask = POWER_PACKAGE_LOCK; +		rp->shift = 63; +	} +	/* non-hardware data are collected by the polling thread */ +	if (rp->flag & RAPL_PRIMITIVE_DERIVED) { +		*data = rd->rdd.primitives[prim]; +		return 0; +	} + +	if (rdmsrl_safe_on_cpu(cpu, msr, &value)) { +		pr_debug("failed to read msr 0x%x on cpu %d\n", msr, cpu); +		return -EIO; +	} + +	final = value & rp->mask; +	final = final >> rp->shift; +	if (xlate) +		*data = rapl_unit_xlate(rd->package_id, rp->unit, final, 0); +	else +		*data = final; + +	return 0; +} + +/* Similar use of primitive info in the read counterpart */ +static int rapl_write_data_raw(struct rapl_domain *rd, +			enum rapl_primitives prim, +			unsigned long long value) +{ +	u64 msr_val; +	u32 msr; +	struct rapl_primitive_info *rp = &rpi[prim]; +	int cpu; + +	cpu = find_active_cpu_on_package(rd->package_id); +	if (cpu < 0) +		return cpu; +	msr = rd->msrs[rp->id]; +	if (rdmsrl_safe_on_cpu(cpu, msr, &msr_val)) { +		dev_dbg(&rd->power_zone.dev, +			"failed to read msr 0x%x on cpu %d\n", msr, cpu); +		return -EIO; +	} +	value = rapl_unit_xlate(rd->package_id, rp->unit, value, 1); +	msr_val &= ~rp->mask; +	msr_val |= value << rp->shift; +	if (wrmsrl_safe_on_cpu(cpu, msr, msr_val)) { +		dev_dbg(&rd->power_zone.dev, +			"failed to write msr 0x%x on cpu %d\n", msr, cpu); +		return -EIO; +	} + +	return 0; +} + +static const struct x86_cpu_id energy_unit_quirk_ids[] = { +	{ X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ +	{} +}; + +static int rapl_check_unit(struct rapl_package *rp, int cpu) +{ +	u64 msr_val; +	u32 value; + +	if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) { +		pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", +			MSR_RAPL_POWER_UNIT, cpu); +		return -ENODEV; +	} + +	/* Raw RAPL data stored in MSRs are in certain scales. We need to +	 * convert them into standard units based on the divisors reported in +	 * the RAPL unit MSRs. +	 * i.e. +	 * energy unit: 1/enery_unit_divisor Joules +	 * power unit: 1/power_unit_divisor Watts +	 * time unit: 1/time_unit_divisor Seconds +	 */ +	value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; +	/* some CPUs have different way to calculate energy unit */ +	if (x86_match_cpu(energy_unit_quirk_ids)) +		rp->energy_unit_divisor = 1000000 / (1 << value); +	else +		rp->energy_unit_divisor = 1 << value; + +	value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; +	rp->power_unit_divisor = 1 << value; + +	value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; +	rp->time_unit_divisor = 1 << value; + +	pr_debug("Physical package %d units: energy=%d, time=%d, power=%d\n", +		rp->id, +		rp->energy_unit_divisor, +		rp->time_unit_divisor, +		rp->power_unit_divisor); + +	return 0; +} + +/* REVISIT: + * When package power limit is set artificially low by RAPL, LVT + * thermal interrupt for package power limit should be ignored + * since we are not really exceeding the real limit. The intention + * is to avoid excessive interrupts while we are trying to save power. + * A useful feature might be routing the package_power_limit interrupt + * to userspace via eventfd. once we have a usecase, this is simple + * to do by adding an atomic notifier. + */ + +static void package_power_limit_irq_save(int package_id) +{ +	u32 l, h = 0; +	int cpu; +	struct rapl_package *rp; + +	rp = find_package_by_id(package_id); +	if (!rp) +		return; + +	if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) +		return; + +	cpu = find_active_cpu_on_package(package_id); +	if (cpu < 0) +		return; +	/* save the state of PLN irq mask bit before disabling it */ +	rdmsr_safe_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); +	if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { +		rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; +		rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; +	} +	l &= ~PACKAGE_THERM_INT_PLN_ENABLE; +	wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); +} + +/* restore per package power limit interrupt enable state */ +static void package_power_limit_irq_restore(int package_id) +{ +	u32 l, h; +	int cpu; +	struct rapl_package *rp; + +	rp = find_package_by_id(package_id); +	if (!rp) +		return; + +	if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) +		return; + +	cpu = find_active_cpu_on_package(package_id); +	if (cpu < 0) +		return; + +	/* irq enable state not saved, nothing to restore */ +	if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) +		return; +	rdmsr_safe_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); + +	if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE) +		l |= PACKAGE_THERM_INT_PLN_ENABLE; +	else +		l &= ~PACKAGE_THERM_INT_PLN_ENABLE; + +	wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); +} + +static const struct x86_cpu_id rapl_ids[] = { +	{ X86_VENDOR_INTEL, 6, 0x2a},/* Sandy Bridge */ +	{ X86_VENDOR_INTEL, 6, 0x2d},/* Sandy Bridge EP */ +	{ X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ +	{ X86_VENDOR_INTEL, 6, 0x3a},/* Ivy Bridge */ +	{ X86_VENDOR_INTEL, 6, 0x3c},/* Haswell */ +	{ X86_VENDOR_INTEL, 6, 0x3d},/* Broadwell */ +	{ X86_VENDOR_INTEL, 6, 0x45},/* Haswell ULT */ +	/* TODO: Add more CPU IDs after testing */ +	{} +}; +MODULE_DEVICE_TABLE(x86cpu, rapl_ids); + +/* read once for all raw primitive data for all packages, domains */ +static void rapl_update_domain_data(void) +{ +	int dmn, prim; +	u64 val; +	struct rapl_package *rp; + +	list_for_each_entry(rp, &rapl_packages, plist) { +		for (dmn = 0; dmn < rp->nr_domains; dmn++) { +			pr_debug("update package %d domain %s data\n", rp->id, +				rp->domains[dmn].name); +			/* exclude non-raw primitives */ +			for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) +				if (!rapl_read_data_raw(&rp->domains[dmn], prim, +								rpi[prim].unit, +								&val)) +					rp->domains[dmn].rdd.primitives[prim] = +									val; +		} +	} + +} + +static int rapl_unregister_powercap(void) +{ +	struct rapl_package *rp; +	struct rapl_domain *rd, *rd_package = NULL; + +	/* unregister all active rapl packages from the powercap layer, +	 * hotplug lock held +	 */ +	list_for_each_entry(rp, &rapl_packages, plist) { +		package_power_limit_irq_restore(rp->id); + +		for (rd = rp->domains; rd < rp->domains + rp->nr_domains; +		     rd++) { +			pr_debug("remove package, undo power limit on %d: %s\n", +				rp->id, rd->name); +			rapl_write_data_raw(rd, PL1_ENABLE, 0); +			rapl_write_data_raw(rd, PL2_ENABLE, 0); +			rapl_write_data_raw(rd, PL1_CLAMP, 0); +			rapl_write_data_raw(rd, PL2_CLAMP, 0); +			if (rd->id == RAPL_DOMAIN_PACKAGE) { +				rd_package = rd; +				continue; +			} +			powercap_unregister_zone(control_type, &rd->power_zone); +		} +		/* do the package zone last */ +		if (rd_package) +			powercap_unregister_zone(control_type, +						&rd_package->power_zone); +	} +	powercap_unregister_control_type(control_type); + +	return 0; +} + +static int rapl_package_register_powercap(struct rapl_package *rp) +{ +	struct rapl_domain *rd; +	int ret = 0; +	char dev_name[17]; /* max domain name = 7 + 1 + 8 for int + 1 for null*/ +	struct powercap_zone *power_zone = NULL; +	int nr_pl; + +	/* first we register package domain as the parent zone*/ +	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { +		if (rd->id == RAPL_DOMAIN_PACKAGE) { +			nr_pl = find_nr_power_limit(rd); +			pr_debug("register socket %d package domain %s\n", +				rp->id, rd->name); +			memset(dev_name, 0, sizeof(dev_name)); +			snprintf(dev_name, sizeof(dev_name), "%s-%d", +				rd->name, rp->id); +			power_zone = powercap_register_zone(&rd->power_zone, +							control_type, +							dev_name, NULL, +							&zone_ops[rd->id], +							nr_pl, +							&constraint_ops); +			if (IS_ERR(power_zone)) { +				pr_debug("failed to register package, %d\n", +					rp->id); +				ret = PTR_ERR(power_zone); +				goto exit_package; +			} +			/* track parent zone in per package/socket data */ +			rp->power_zone = power_zone; +			/* done, only one package domain per socket */ +			break; +		} +	} +	if (!power_zone) { +		pr_err("no package domain found, unknown topology!\n"); +		ret = -ENODEV; +		goto exit_package; +	} +	/* now register domains as children of the socket/package*/ +	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { +		if (rd->id == RAPL_DOMAIN_PACKAGE) +			continue; +		/* number of power limits per domain varies */ +		nr_pl = find_nr_power_limit(rd); +		power_zone = powercap_register_zone(&rd->power_zone, +						control_type, rd->name, +						rp->power_zone, +						&zone_ops[rd->id], nr_pl, +						&constraint_ops); + +		if (IS_ERR(power_zone)) { +			pr_debug("failed to register power_zone, %d:%s:%s\n", +				rp->id, rd->name, dev_name); +			ret = PTR_ERR(power_zone); +			goto err_cleanup; +		} +	} + +exit_package: +	return ret; +err_cleanup: +	/* clean up previously initialized domains within the package if we +	 * failed after the first domain setup. +	 */ +	while (--rd >= rp->domains) { +		pr_debug("unregister package %d domain %s\n", rp->id, rd->name); +		powercap_unregister_zone(control_type, &rd->power_zone); +	} + +	return ret; +} + +static int rapl_register_powercap(void) +{ +	struct rapl_domain *rd; +	struct rapl_package *rp; +	int ret = 0; + +	control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); +	if (IS_ERR(control_type)) { +		pr_debug("failed to register powercap control_type.\n"); +		return PTR_ERR(control_type); +	} +	/* read the initial data */ +	rapl_update_domain_data(); +	list_for_each_entry(rp, &rapl_packages, plist) +		if (rapl_package_register_powercap(rp)) +			goto err_cleanup_package; +	return ret; + +err_cleanup_package: +	/* clean up previously initialized packages */ +	list_for_each_entry_continue_reverse(rp, &rapl_packages, plist) { +		for (rd = rp->domains; rd < rp->domains + rp->nr_domains; +		     rd++) { +			pr_debug("unregister zone/package %d, %s domain\n", +				rp->id, rd->name); +			powercap_unregister_zone(control_type, &rd->power_zone); +		} +	} + +	return ret; +} + +static int rapl_check_domain(int cpu, int domain) +{ +	unsigned msr; +	u64 val = 0; + +	switch (domain) { +	case RAPL_DOMAIN_PACKAGE: +		msr = MSR_PKG_ENERGY_STATUS; +		break; +	case RAPL_DOMAIN_PP0: +		msr = MSR_PP0_ENERGY_STATUS; +		break; +	case RAPL_DOMAIN_PP1: +		msr = MSR_PP1_ENERGY_STATUS; +		break; +	case RAPL_DOMAIN_DRAM: +		msr = MSR_DRAM_ENERGY_STATUS; +		break; +	default: +		pr_err("invalid domain id %d\n", domain); +		return -EINVAL; +	} +	/* make sure domain counters are available and contains non-zero +	 * values, otherwise skip it. +	 */ +	if (rdmsrl_safe_on_cpu(cpu, msr, &val) || !val) +		return -ENODEV; + +	return 0; +} + +/* Detect active and valid domains for the given CPU, caller must + * ensure the CPU belongs to the targeted package and CPU hotlug is disabled. + */ +static int rapl_detect_domains(struct rapl_package *rp, int cpu) +{ +	int i; +	int ret = 0; +	struct rapl_domain *rd; +	u64 locked; + +	for (i = 0; i < RAPL_DOMAIN_MAX; i++) { +		/* use physical package id to read counters */ +		if (!rapl_check_domain(cpu, i)) +			rp->domain_map |= 1 << i; +		else +			pr_warn("RAPL domain %s detection failed\n", +				rapl_domain_names[i]); +	} +	rp->nr_domains = bitmap_weight(&rp->domain_map,	RAPL_DOMAIN_MAX); +	if (!rp->nr_domains) { +		pr_err("no valid rapl domains found in package %d\n", rp->id); +		ret = -ENODEV; +		goto done; +	} +	pr_debug("found %d domains on package %d\n", rp->nr_domains, rp->id); + +	rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain), +			GFP_KERNEL); +	if (!rp->domains) { +		ret = -ENOMEM; +		goto done; +	} +	rapl_init_domains(rp); + +	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { +		/* check if the domain is locked by BIOS */ +		if (rapl_read_data_raw(rd, FW_LOCK, false, &locked)) { +			pr_info("RAPL package %d domain %s locked by BIOS\n", +				rp->id, rd->name); +				rd->state |= DOMAIN_STATE_BIOS_LOCKED; +		} +	} + + +done: +	return ret; +} + +static bool is_package_new(int package) +{ +	struct rapl_package *rp; + +	/* caller prevents cpu hotplug, there will be no new packages added +	 * or deleted while traversing the package list, no need for locking. +	 */ +	list_for_each_entry(rp, &rapl_packages, plist) +		if (package == rp->id) +			return false; + +	return true; +} + +/* RAPL interface can be made of a two-level hierarchy: package level and domain + * level. We first detect the number of packages then domains of each package. + * We have to consider the possiblity of CPU online/offline due to hotplug and + * other scenarios. + */ +static int rapl_detect_topology(void) +{ +	int i; +	int phy_package_id; +	struct rapl_package *new_package, *rp; + +	for_each_online_cpu(i) { +		phy_package_id = topology_physical_package_id(i); +		if (is_package_new(phy_package_id)) { +			new_package = kzalloc(sizeof(*rp), GFP_KERNEL); +			if (!new_package) { +				rapl_cleanup_data(); +				return -ENOMEM; +			} +			/* add the new package to the list */ +			new_package->id = phy_package_id; +			new_package->nr_cpus = 1; + +			/* check if the package contains valid domains */ +			if (rapl_detect_domains(new_package, i) || +				rapl_check_unit(new_package, i)) { +				kfree(new_package->domains); +				kfree(new_package); +				/* free up the packages already initialized */ +				rapl_cleanup_data(); +				return -ENODEV; +			} +			INIT_LIST_HEAD(&new_package->plist); +			list_add(&new_package->plist, &rapl_packages); +		} else { +			rp = find_package_by_id(phy_package_id); +			if (rp) +				++rp->nr_cpus; +		} +	} + +	return 0; +} + +/* called from CPU hotplug notifier, hotplug lock held */ +static void rapl_remove_package(struct rapl_package *rp) +{ +	struct rapl_domain *rd, *rd_package = NULL; + +	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { +		if (rd->id == RAPL_DOMAIN_PACKAGE) { +			rd_package = rd; +			continue; +		} +		pr_debug("remove package %d, %s domain\n", rp->id, rd->name); +		powercap_unregister_zone(control_type, &rd->power_zone); +	} +	/* do parent zone last */ +	powercap_unregister_zone(control_type, &rd_package->power_zone); +	list_del(&rp->plist); +	kfree(rp); +} + +/* called from CPU hotplug notifier, hotplug lock held */ +static int rapl_add_package(int cpu) +{ +	int ret = 0; +	int phy_package_id; +	struct rapl_package *rp; + +	phy_package_id = topology_physical_package_id(cpu); +	rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); +	if (!rp) +		return -ENOMEM; + +	/* add the new package to the list */ +	rp->id = phy_package_id; +	rp->nr_cpus = 1; +	/* check if the package contains valid domains */ +	if (rapl_detect_domains(rp, cpu) || +		rapl_check_unit(rp, cpu)) { +		ret = -ENODEV; +		goto err_free_package; +	} +	if (!rapl_package_register_powercap(rp)) { +		INIT_LIST_HEAD(&rp->plist); +		list_add(&rp->plist, &rapl_packages); +		return ret; +	} + +err_free_package: +	kfree(rp->domains); +	kfree(rp); + +	return ret; +} + +/* Handles CPU hotplug on multi-socket systems. + * If a CPU goes online as the first CPU of the physical package + * we add the RAPL package to the system. Similarly, when the last + * CPU of the package is removed, we remove the RAPL package and its + * associated domains. Cooling devices are handled accordingly at + * per-domain level. + */ +static int rapl_cpu_callback(struct notifier_block *nfb, +				unsigned long action, void *hcpu) +{ +	unsigned long cpu = (unsigned long)hcpu; +	int phy_package_id; +	struct rapl_package *rp; + +	phy_package_id = topology_physical_package_id(cpu); +	switch (action) { +	case CPU_ONLINE: +	case CPU_ONLINE_FROZEN: +	case CPU_DOWN_FAILED: +	case CPU_DOWN_FAILED_FROZEN: +		rp = find_package_by_id(phy_package_id); +		if (rp) +			++rp->nr_cpus; +		else +			rapl_add_package(cpu); +		break; +	case CPU_DOWN_PREPARE: +	case CPU_DOWN_PREPARE_FROZEN: +		rp = find_package_by_id(phy_package_id); +		if (!rp) +			break; +		if (--rp->nr_cpus == 0) +			rapl_remove_package(rp); +	} + +	return NOTIFY_OK; +} + +static struct notifier_block rapl_cpu_notifier = { +	.notifier_call = rapl_cpu_callback, +}; + +static int __init rapl_init(void) +{ +	int ret = 0; + +	if (!x86_match_cpu(rapl_ids)) { +		pr_err("driver does not support CPU family %d model %d\n", +			boot_cpu_data.x86, boot_cpu_data.x86_model); + +		return -ENODEV; +	} + +	cpu_notifier_register_begin(); + +	/* prevent CPU hotplug during detection */ +	get_online_cpus(); +	ret = rapl_detect_topology(); +	if (ret) +		goto done; + +	if (rapl_register_powercap()) { +		rapl_cleanup_data(); +		ret = -ENODEV; +		goto done; +	} +	__register_hotcpu_notifier(&rapl_cpu_notifier); +done: +	put_online_cpus(); +	cpu_notifier_register_done(); + +	return ret; +} + +static void __exit rapl_exit(void) +{ +	cpu_notifier_register_begin(); +	get_online_cpus(); +	__unregister_hotcpu_notifier(&rapl_cpu_notifier); +	rapl_unregister_powercap(); +	rapl_cleanup_data(); +	put_online_cpus(); +	cpu_notifier_register_done(); +} + +module_init(rapl_init); +module_exit(rapl_exit); + +MODULE_DESCRIPTION("Driver for Intel RAPL (Running Average Power Limit)"); +MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c new file mode 100644 index 00000000000..84419af16f7 --- /dev/null +++ b/drivers/powercap/powercap_sys.c @@ -0,0 +1,690 @@ +/* + * Power capping class + * Copyright (c) 2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc. + * + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/powercap.h> + +#define to_powercap_zone(n) container_of(n, struct powercap_zone, dev) +#define to_powercap_control_type(n) \ +			container_of(n, struct powercap_control_type, dev) + +/* Power zone show function */ +#define define_power_zone_show(_attr)		\ +static ssize_t _attr##_show(struct device *dev, \ +					struct device_attribute *dev_attr,\ +					char *buf) \ +{ \ +	u64 value; \ +	ssize_t len = -EINVAL; \ +	struct powercap_zone *power_zone = to_powercap_zone(dev); \ +	\ +	if (power_zone->ops->get_##_attr) { \ +		if (!power_zone->ops->get_##_attr(power_zone, &value)) \ +			len = sprintf(buf, "%lld\n", value); \ +	} \ +	\ +	return len; \ +} + +/* The only meaningful input is 0 (reset), others are silently ignored */ +#define define_power_zone_store(_attr)		\ +static ssize_t _attr##_store(struct device *dev,\ +				struct device_attribute *dev_attr, \ +				const char *buf, size_t count) \ +{ \ +	int err; \ +	struct powercap_zone *power_zone = to_powercap_zone(dev); \ +	u64 value; \ +	\ +	err = kstrtoull(buf, 10, &value); \ +	if (err) \ +		return -EINVAL; \ +	if (value) \ +		return count; \ +	if (power_zone->ops->reset_##_attr) { \ +		if (!power_zone->ops->reset_##_attr(power_zone)) \ +			return count; \ +	} \ +	\ +	return -EINVAL; \ +} + +/* Power zone constraint show function */ +#define define_power_zone_constraint_show(_attr) \ +static ssize_t show_constraint_##_attr(struct device *dev, \ +				struct device_attribute *dev_attr,\ +				char *buf) \ +{ \ +	u64 value; \ +	ssize_t len = -ENODATA; \ +	struct powercap_zone *power_zone = to_powercap_zone(dev); \ +	int id; \ +	struct powercap_zone_constraint *pconst;\ +	\ +	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) \ +		return -EINVAL; \ +	if (id >= power_zone->const_id_cnt)	\ +		return -EINVAL; \ +	pconst = &power_zone->constraints[id]; \ +	if (pconst && pconst->ops && pconst->ops->get_##_attr) { \ +		if (!pconst->ops->get_##_attr(power_zone, id, &value)) \ +			len = sprintf(buf, "%lld\n", value); \ +	} \ +	\ +	return len; \ +} + +/* Power zone constraint store function */ +#define define_power_zone_constraint_store(_attr) \ +static ssize_t store_constraint_##_attr(struct device *dev,\ +				struct device_attribute *dev_attr, \ +				const char *buf, size_t count) \ +{ \ +	int err; \ +	u64 value; \ +	struct powercap_zone *power_zone = to_powercap_zone(dev); \ +	int id; \ +	struct powercap_zone_constraint *pconst;\ +	\ +	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) \ +		return -EINVAL; \ +	if (id >= power_zone->const_id_cnt)	\ +		return -EINVAL; \ +	pconst = &power_zone->constraints[id]; \ +	err = kstrtoull(buf, 10, &value); \ +	if (err) \ +		return -EINVAL; \ +	if (pconst && pconst->ops && pconst->ops->set_##_attr) { \ +		if (!pconst->ops->set_##_attr(power_zone, id, value)) \ +			return count; \ +	} \ +	\ +	return -ENODATA; \ +} + +/* Power zone information callbacks */ +define_power_zone_show(power_uw); +define_power_zone_show(max_power_range_uw); +define_power_zone_show(energy_uj); +define_power_zone_store(energy_uj); +define_power_zone_show(max_energy_range_uj); + +/* Power zone attributes */ +static DEVICE_ATTR_RO(max_power_range_uw); +static DEVICE_ATTR_RO(power_uw); +static DEVICE_ATTR_RO(max_energy_range_uj); +static DEVICE_ATTR_RW(energy_uj); + +/* Power zone constraint attributes callbacks */ +define_power_zone_constraint_show(power_limit_uw); +define_power_zone_constraint_store(power_limit_uw); +define_power_zone_constraint_show(time_window_us); +define_power_zone_constraint_store(time_window_us); +define_power_zone_constraint_show(max_power_uw); +define_power_zone_constraint_show(min_power_uw); +define_power_zone_constraint_show(max_time_window_us); +define_power_zone_constraint_show(min_time_window_us); + +/* For one time seeding of constraint device attributes */ +struct powercap_constraint_attr { +	struct device_attribute power_limit_attr; +	struct device_attribute time_window_attr; +	struct device_attribute max_power_attr; +	struct device_attribute min_power_attr; +	struct device_attribute max_time_window_attr; +	struct device_attribute min_time_window_attr; +	struct device_attribute name_attr; +}; + +static struct powercap_constraint_attr +				constraint_attrs[MAX_CONSTRAINTS_PER_ZONE]; + +/* A list of powercap control_types */ +static LIST_HEAD(powercap_cntrl_list); +/* Mutex to protect list of powercap control_types */ +static DEFINE_MUTEX(powercap_cntrl_list_lock); + +#define POWERCAP_CONSTRAINT_NAME_LEN	30 /* Some limit to avoid overflow */ +static ssize_t show_constraint_name(struct device *dev, +				struct device_attribute *dev_attr, +				char *buf) +{ +	const char *name; +	struct powercap_zone *power_zone = to_powercap_zone(dev); +	int id; +	ssize_t len = -ENODATA; +	struct powercap_zone_constraint *pconst; + +	if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) +		return -EINVAL; +	if (id >= power_zone->const_id_cnt) +		return -EINVAL; +	pconst = &power_zone->constraints[id]; + +	if (pconst && pconst->ops && pconst->ops->get_name) { +		name = pconst->ops->get_name(power_zone, id); +		if (name) { +			snprintf(buf, POWERCAP_CONSTRAINT_NAME_LEN, +								"%s\n", name); +			buf[POWERCAP_CONSTRAINT_NAME_LEN] = '\0'; +			len = strlen(buf); +		} +	} + +	return len; +} + +static int create_constraint_attribute(int id, const char *name, +				int mode, +				struct device_attribute *dev_attr, +				ssize_t (*show)(struct device *, +					struct device_attribute *, char *), +				ssize_t (*store)(struct device *, +					struct device_attribute *, +				const char *, size_t) +				) +{ + +	dev_attr->attr.name = kasprintf(GFP_KERNEL, "constraint_%d_%s", +								id, name); +	if (!dev_attr->attr.name) +		return -ENOMEM; +	dev_attr->attr.mode = mode; +	dev_attr->show = show; +	dev_attr->store = store; + +	return 0; +} + +static void free_constraint_attributes(void) +{ +	int i; + +	for (i = 0; i < MAX_CONSTRAINTS_PER_ZONE; ++i) { +		kfree(constraint_attrs[i].power_limit_attr.attr.name); +		kfree(constraint_attrs[i].time_window_attr.attr.name); +		kfree(constraint_attrs[i].name_attr.attr.name); +		kfree(constraint_attrs[i].max_power_attr.attr.name); +		kfree(constraint_attrs[i].min_power_attr.attr.name); +		kfree(constraint_attrs[i].max_time_window_attr.attr.name); +		kfree(constraint_attrs[i].min_time_window_attr.attr.name); +	} +} + +static int seed_constraint_attributes(void) +{ +	int i; +	int ret; + +	for (i = 0; i < MAX_CONSTRAINTS_PER_ZONE; ++i) { +		ret = create_constraint_attribute(i, "power_limit_uw", +					S_IWUSR | S_IRUGO, +					&constraint_attrs[i].power_limit_attr, +					show_constraint_power_limit_uw, +					store_constraint_power_limit_uw); +		if (ret) +			goto err_alloc; +		ret = create_constraint_attribute(i, "time_window_us", +					S_IWUSR | S_IRUGO, +					&constraint_attrs[i].time_window_attr, +					show_constraint_time_window_us, +					store_constraint_time_window_us); +		if (ret) +			goto err_alloc; +		ret = create_constraint_attribute(i, "name", S_IRUGO, +				&constraint_attrs[i].name_attr, +				show_constraint_name, +				NULL); +		if (ret) +			goto err_alloc; +		ret = create_constraint_attribute(i, "max_power_uw", S_IRUGO, +				&constraint_attrs[i].max_power_attr, +				show_constraint_max_power_uw, +				NULL); +		if (ret) +			goto err_alloc; +		ret = create_constraint_attribute(i, "min_power_uw", S_IRUGO, +				&constraint_attrs[i].min_power_attr, +				show_constraint_min_power_uw, +				NULL); +		if (ret) +			goto err_alloc; +		ret = create_constraint_attribute(i, "max_time_window_us", +				S_IRUGO, +				&constraint_attrs[i].max_time_window_attr, +				show_constraint_max_time_window_us, +				NULL); +		if (ret) +			goto err_alloc; +		ret = create_constraint_attribute(i, "min_time_window_us", +				S_IRUGO, +				&constraint_attrs[i].min_time_window_attr, +				show_constraint_min_time_window_us, +				NULL); +		if (ret) +			goto err_alloc; + +	} + +	return 0; + +err_alloc: +	free_constraint_attributes(); + +	return ret; +} + +static int create_constraints(struct powercap_zone *power_zone, +				int nr_constraints, +				struct powercap_zone_constraint_ops *const_ops) +{ +	int i; +	int ret = 0; +	int count; +	struct powercap_zone_constraint *pconst; + +	if (!power_zone || !const_ops || !const_ops->get_power_limit_uw || +					!const_ops->set_power_limit_uw || +					!const_ops->get_time_window_us || +					!const_ops->set_time_window_us) +		return -EINVAL; + +	count = power_zone->zone_attr_count; +	for (i = 0; i < nr_constraints; ++i) { +		pconst = &power_zone->constraints[i]; +		pconst->ops = const_ops; +		pconst->id = power_zone->const_id_cnt; +		power_zone->const_id_cnt++; +		power_zone->zone_dev_attrs[count++] = +				&constraint_attrs[i].power_limit_attr.attr; +		power_zone->zone_dev_attrs[count++] = +				&constraint_attrs[i].time_window_attr.attr; +		if (pconst->ops->get_name) +			power_zone->zone_dev_attrs[count++] = +				&constraint_attrs[i].name_attr.attr; +		if (pconst->ops->get_max_power_uw) +			power_zone->zone_dev_attrs[count++] = +				&constraint_attrs[i].max_power_attr.attr; +		if (pconst->ops->get_min_power_uw) +			power_zone->zone_dev_attrs[count++] = +				&constraint_attrs[i].min_power_attr.attr; +		if (pconst->ops->get_max_time_window_us) +			power_zone->zone_dev_attrs[count++] = +				&constraint_attrs[i].max_time_window_attr.attr; +		if (pconst->ops->get_min_time_window_us) +			power_zone->zone_dev_attrs[count++] = +				&constraint_attrs[i].min_time_window_attr.attr; +	} +	power_zone->zone_attr_count = count; + +	return ret; +} + +static bool control_type_valid(void *control_type) +{ +	struct powercap_control_type *pos = NULL; +	bool found = false; + +	mutex_lock(&powercap_cntrl_list_lock); + +	list_for_each_entry(pos, &powercap_cntrl_list, node) { +		if (pos == control_type) { +			found = true; +			break; +		} +	} +	mutex_unlock(&powercap_cntrl_list_lock); + +	return found; +} + +static ssize_t name_show(struct device *dev, +				struct device_attribute *attr, +				char *buf) +{ +	struct powercap_zone *power_zone = to_powercap_zone(dev); + +	return sprintf(buf, "%s\n", power_zone->name); +} + +static DEVICE_ATTR_RO(name); + +/* Create zone and attributes in sysfs */ +static void create_power_zone_common_attributes( +					struct powercap_zone *power_zone) +{ +	int count = 0; + +	power_zone->zone_dev_attrs[count++] = &dev_attr_name.attr; +	if (power_zone->ops->get_max_energy_range_uj) +		power_zone->zone_dev_attrs[count++] = +					&dev_attr_max_energy_range_uj.attr; +	if (power_zone->ops->get_energy_uj) { +		if (power_zone->ops->reset_energy_uj) +			dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUGO; +		else +			dev_attr_energy_uj.attr.mode = S_IRUGO; +		power_zone->zone_dev_attrs[count++] = +					&dev_attr_energy_uj.attr; +	} +	if (power_zone->ops->get_power_uw) +		power_zone->zone_dev_attrs[count++] = +					&dev_attr_power_uw.attr; +	if (power_zone->ops->get_max_power_range_uw) +		power_zone->zone_dev_attrs[count++] = +					&dev_attr_max_power_range_uw.attr; +	power_zone->zone_dev_attrs[count] = NULL; +	power_zone->zone_attr_count = count; +} + +static void powercap_release(struct device *dev) +{ +	bool allocated; + +	if (dev->parent) { +		struct powercap_zone *power_zone = to_powercap_zone(dev); + +		/* Store flag as the release() may free memory */ +		allocated = power_zone->allocated; +		/* Remove id from parent idr struct */ +		idr_remove(power_zone->parent_idr, power_zone->id); +		/* Destroy idrs allocated for this zone */ +		idr_destroy(&power_zone->idr); +		kfree(power_zone->name); +		kfree(power_zone->zone_dev_attrs); +		kfree(power_zone->constraints); +		if (power_zone->ops->release) +			power_zone->ops->release(power_zone); +		if (allocated) +			kfree(power_zone); +	} else { +		struct powercap_control_type *control_type = +						to_powercap_control_type(dev); + +		/* Store flag as the release() may free memory */ +		allocated = control_type->allocated; +		idr_destroy(&control_type->idr); +		mutex_destroy(&control_type->lock); +		if (control_type->ops && control_type->ops->release) +			control_type->ops->release(control_type); +		if (allocated) +			kfree(control_type); +	} +} + +static ssize_t enabled_show(struct device *dev, +				struct device_attribute *attr, +				char *buf) +{ +	bool mode = true; + +	/* Default is enabled */ +	if (dev->parent) { +		struct powercap_zone *power_zone = to_powercap_zone(dev); +		if (power_zone->ops->get_enable) +			if (power_zone->ops->get_enable(power_zone, &mode)) +				mode = false; +	} else { +		struct powercap_control_type *control_type = +						to_powercap_control_type(dev); +		if (control_type->ops && control_type->ops->get_enable) +			if (control_type->ops->get_enable(control_type, &mode)) +				mode = false; +	} + +	return sprintf(buf, "%d\n", mode); +} + +static ssize_t enabled_store(struct device *dev, +				struct device_attribute *attr, +				const char *buf,  size_t len) +{ +	bool mode; + +	if (strtobool(buf, &mode)) +		return -EINVAL; +	if (dev->parent) { +		struct powercap_zone *power_zone = to_powercap_zone(dev); +		if (power_zone->ops->set_enable) +			if (!power_zone->ops->set_enable(power_zone, mode)) +				return len; +	} else { +		struct powercap_control_type *control_type = +						to_powercap_control_type(dev); +		if (control_type->ops && control_type->ops->set_enable) +			if (!control_type->ops->set_enable(control_type, mode)) +				return len; +	} + +	return -ENOSYS; +} + +static DEVICE_ATTR_RW(enabled); + +static struct attribute *powercap_attrs[] = { +	&dev_attr_enabled.attr, +	NULL, +}; +ATTRIBUTE_GROUPS(powercap); + +static struct class powercap_class = { +	.name = "powercap", +	.dev_release = powercap_release, +	.dev_groups = powercap_groups, +}; + +struct powercap_zone *powercap_register_zone( +				struct powercap_zone *power_zone, +				struct powercap_control_type *control_type, +				const char *name, +				struct powercap_zone *parent, +				const struct powercap_zone_ops *ops, +				int nr_constraints, +				struct powercap_zone_constraint_ops *const_ops) +{ +	int result; +	int nr_attrs; + +	if (!name || !control_type || !ops || +			nr_constraints > MAX_CONSTRAINTS_PER_ZONE || +			(!ops->get_energy_uj && !ops->get_power_uw) || +			!control_type_valid(control_type)) +		return ERR_PTR(-EINVAL); + +	if (power_zone) { +		if (!ops->release) +			return ERR_PTR(-EINVAL); +		memset(power_zone, 0, sizeof(*power_zone)); +	} else { +		power_zone = kzalloc(sizeof(*power_zone), GFP_KERNEL); +		if (!power_zone) +			return ERR_PTR(-ENOMEM); +		power_zone->allocated = true; +	} +	power_zone->ops = ops; +	power_zone->control_type_inst = control_type; +	if (!parent) { +		power_zone->dev.parent = &control_type->dev; +		power_zone->parent_idr = &control_type->idr; +	} else { +		power_zone->dev.parent = &parent->dev; +		power_zone->parent_idr = &parent->idr; +	} +	power_zone->dev.class = &powercap_class; + +	mutex_lock(&control_type->lock); +	/* Using idr to get the unique id */ +	result = idr_alloc(power_zone->parent_idr, NULL, 0, 0, GFP_KERNEL); +	if (result < 0) +		goto err_idr_alloc; + +	power_zone->id = result; +	idr_init(&power_zone->idr); +	power_zone->name = kstrdup(name, GFP_KERNEL); +	if (!power_zone->name) +		goto err_name_alloc; +	dev_set_name(&power_zone->dev, "%s:%x", +					dev_name(power_zone->dev.parent), +					power_zone->id); +	power_zone->constraints = kzalloc(sizeof(*power_zone->constraints) * +					 nr_constraints, GFP_KERNEL); +	if (!power_zone->constraints) +		goto err_const_alloc; + +	nr_attrs = nr_constraints * POWERCAP_CONSTRAINTS_ATTRS + +						POWERCAP_ZONE_MAX_ATTRS + 1; +	power_zone->zone_dev_attrs = kzalloc(sizeof(void *) * +						nr_attrs, GFP_KERNEL); +	if (!power_zone->zone_dev_attrs) +		goto err_attr_alloc; +	create_power_zone_common_attributes(power_zone); +	result = create_constraints(power_zone, nr_constraints, const_ops); +	if (result) +		goto err_dev_ret; + +	power_zone->zone_dev_attrs[power_zone->zone_attr_count] = NULL; +	power_zone->dev_zone_attr_group.attrs = power_zone->zone_dev_attrs; +	power_zone->dev_attr_groups[0] = &power_zone->dev_zone_attr_group; +	power_zone->dev_attr_groups[1] = NULL; +	power_zone->dev.groups = power_zone->dev_attr_groups; +	result = device_register(&power_zone->dev); +	if (result) +		goto err_dev_ret; + +	control_type->nr_zones++; +	mutex_unlock(&control_type->lock); + +	return power_zone; + +err_dev_ret: +	kfree(power_zone->zone_dev_attrs); +err_attr_alloc: +	kfree(power_zone->constraints); +err_const_alloc: +	kfree(power_zone->name); +err_name_alloc: +	idr_remove(power_zone->parent_idr, power_zone->id); +err_idr_alloc: +	if (power_zone->allocated) +		kfree(power_zone); +	mutex_unlock(&control_type->lock); + +	return ERR_PTR(result); +} +EXPORT_SYMBOL_GPL(powercap_register_zone); + +int powercap_unregister_zone(struct powercap_control_type *control_type, +				struct powercap_zone *power_zone) +{ +	if (!power_zone || !control_type) +		return -EINVAL; + +	mutex_lock(&control_type->lock); +	control_type->nr_zones--; +	mutex_unlock(&control_type->lock); + +	device_unregister(&power_zone->dev); + +	return 0; +} +EXPORT_SYMBOL_GPL(powercap_unregister_zone); + +struct powercap_control_type *powercap_register_control_type( +				struct powercap_control_type *control_type, +				const char *name, +				const struct powercap_control_type_ops *ops) +{ +	int result; + +	if (!name) +		return ERR_PTR(-EINVAL); +	if (control_type) { +		if (!ops || !ops->release) +			return ERR_PTR(-EINVAL); +		memset(control_type, 0, sizeof(*control_type)); +	} else { +		control_type = kzalloc(sizeof(*control_type), GFP_KERNEL); +		if (!control_type) +			return ERR_PTR(-ENOMEM); +		control_type->allocated = true; +	} +	mutex_init(&control_type->lock); +	control_type->ops = ops; +	INIT_LIST_HEAD(&control_type->node); +	control_type->dev.class = &powercap_class; +	dev_set_name(&control_type->dev, "%s", name); +	result = device_register(&control_type->dev); +	if (result) { +		if (control_type->allocated) +			kfree(control_type); +		return ERR_PTR(result); +	} +	idr_init(&control_type->idr); + +	mutex_lock(&powercap_cntrl_list_lock); +	list_add_tail(&control_type->node, &powercap_cntrl_list); +	mutex_unlock(&powercap_cntrl_list_lock); + +	return control_type; +} +EXPORT_SYMBOL_GPL(powercap_register_control_type); + +int powercap_unregister_control_type(struct powercap_control_type *control_type) +{ +	struct powercap_control_type *pos = NULL; + +	if (control_type->nr_zones) { +		dev_err(&control_type->dev, "Zones of this type still not freed\n"); +		return -EINVAL; +	} +	mutex_lock(&powercap_cntrl_list_lock); +	list_for_each_entry(pos, &powercap_cntrl_list, node) { +		if (pos == control_type) { +			list_del(&control_type->node); +			mutex_unlock(&powercap_cntrl_list_lock); +			device_unregister(&control_type->dev); +			return 0; +		} +	} +	mutex_unlock(&powercap_cntrl_list_lock); + +	return -ENODEV; +} +EXPORT_SYMBOL_GPL(powercap_unregister_control_type); + +static int __init powercap_init(void) +{ +	int result = 0; + +	result = seed_constraint_attributes(); +	if (result) +		return result; + +	result = class_register(&powercap_class); + +	return result; +} + +device_initcall(powercap_init); + +MODULE_DESCRIPTION("PowerCap sysfs Driver"); +MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); +MODULE_LICENSE("GPL v2");  | 
