diff options
Diffstat (limited to 'arch/x86/pci')
| -rw-r--r-- | arch/x86/pci/Makefile | 10 | ||||
| -rw-r--r-- | arch/x86/pci/acpi.c | 470 | ||||
| -rw-r--r-- | arch/x86/pci/amd_bus.c | 248 | ||||
| -rw-r--r-- | arch/x86/pci/broadcom_bus.c | 75 | ||||
| -rw-r--r-- | arch/x86/pci/bus_numa.c | 119 | ||||
| -rw-r--r-- | arch/x86/pci/bus_numa.h | 21 | ||||
| -rw-r--r-- | arch/x86/pci/ce4100.c | 331 | ||||
| -rw-r--r-- | arch/x86/pci/common.c | 264 | ||||
| -rw-r--r-- | arch/x86/pci/direct.c | 29 | ||||
| -rw-r--r-- | arch/x86/pci/fixup.c | 119 | ||||
| -rw-r--r-- | arch/x86/pci/i386.c | 338 | ||||
| -rw-r--r-- | arch/x86/pci/intel_mid_pci.c (renamed from arch/x86/pci/mrst.c) | 87 | ||||
| -rw-r--r-- | arch/x86/pci/irq.c | 26 | ||||
| -rw-r--r-- | arch/x86/pci/legacy.c | 14 | ||||
| -rw-r--r-- | arch/x86/pci/mmconfig-shared.c | 382 | ||||
| -rw-r--r-- | arch/x86/pci/mmconfig_32.c | 31 | ||||
| -rw-r--r-- | arch/x86/pci/mmconfig_64.c | 52 | ||||
| -rw-r--r-- | arch/x86/pci/numachip.c | 129 | ||||
| -rw-r--r-- | arch/x86/pci/numaq_32.c | 165 | ||||
| -rw-r--r-- | arch/x86/pci/olpc.c | 6 | ||||
| -rw-r--r-- | arch/x86/pci/pcbios.c | 31 | ||||
| -rw-r--r-- | arch/x86/pci/sta2x11-fixup.c | 364 | ||||
| -rw-r--r-- | arch/x86/pci/visws.c | 92 | ||||
| -rw-r--r-- | arch/x86/pci/xen.c | 591 | 
24 files changed, 2737 insertions, 1257 deletions
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index effd96e33f1..5c6fc3577a4 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -7,18 +7,20 @@ obj-$(CONFIG_PCI_OLPC)		+= olpc.o  obj-$(CONFIG_PCI_XEN)		+= xen.o  obj-y				+= fixup.o +obj-$(CONFIG_X86_INTEL_CE)      += ce4100.o  obj-$(CONFIG_ACPI)		+= acpi.o  obj-y				+= legacy.o irq.o -obj-$(CONFIG_X86_VISWS)		+= visws.o +obj-$(CONFIG_STA2X11)           += sta2x11-fixup.o -obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o +obj-$(CONFIG_X86_NUMACHIP)	+= numachip.o -obj-$(CONFIG_X86_MRST)		+= mrst.o +obj-$(CONFIG_X86_INTEL_MID)	+= intel_mid_pci.o  obj-y				+= common.o early.o -obj-y				+= amd_bus.o bus_numa.o +obj-y				+= bus_numa.o +obj-$(CONFIG_AMD_NB)		+= amd_bus.o  obj-$(CONFIG_PCI_CNB20LE_QUIRK)	+= broadcom_bus.o  ifeq ($(CONFIG_PCI_DEBUG),y) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 15466c096ba..5075371ab59 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -9,14 +9,21 @@  struct pci_root_info {  	struct acpi_device *bridge; -	char *name; +	char name[16];  	unsigned int res_num;  	struct resource *res; -	struct pci_bus *bus; -	int busnum; +	resource_size_t *res_offset; +	struct pci_sysdata sd; +#ifdef	CONFIG_PCI_MMCONFIG +	bool mcfg_added; +	u16 segment; +	u8 start_bus; +	u8 end_bus; +#endif  };  static bool pci_use_crs = true; +static bool pci_ignore_seg = false;  static int __init set_use_crs(const struct dmi_system_id *id)  { @@ -24,7 +31,20 @@ static int __init set_use_crs(const struct dmi_system_id *id)  	return 0;  } -static const struct dmi_system_id pci_use_crs_table[] __initconst = { +static int __init set_nouse_crs(const struct dmi_system_id *id) +{ +	pci_use_crs = false; +	return 0; +} + +static int __init set_ignore_seg(const struct dmi_system_id *id) +{ +	printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident); +	pci_ignore_seg = true; +	return 0; +} + +static const struct dmi_system_id pci_crs_quirks[] __initconst = {  	/* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */  	{  		.callback = set_use_crs, @@ -43,6 +63,60 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {  			DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"),                  },          }, +	/* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */ +	/* 2006 AMD HT/VIA system with two host bridges */ +	{ +		.callback = set_use_crs, +		.ident = "ASUS M2V-MX SE", +		.matches = { +			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), +			DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"), +			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), +		}, +	}, +	/* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */ +	{ +		.callback = set_use_crs, +		.ident = "MSI MS-7253", +		.matches = { +			DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), +			DMI_MATCH(DMI_BOARD_NAME, "MS-7253"), +			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), +		}, +	}, + +	/* Now for the blacklist.. */ + +	/* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ +	{ +		.callback = set_nouse_crs, +		.ident = "Dell Studio 1557", +		.matches = { +			DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."), +			DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"), +			DMI_MATCH(DMI_BIOS_VERSION, "A09"), +		}, +	}, +	/* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ +	{ +		.callback = set_nouse_crs, +		.ident = "Thinkpad SL510", +		.matches = { +			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), +			DMI_MATCH(DMI_BOARD_NAME, "2847DFG"), +			DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), +		}, +	}, + +	/* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */ +	{ +		.callback = set_ignore_seg, +		.ident = "HP xw9300", +		.matches = { +			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), +			DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation"), +		}, +	},  	{}  }; @@ -53,7 +127,7 @@ void __init pci_acpi_crs_quirks(void)  	if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008)  		pci_use_crs = false; -	dmi_check_system(pci_use_crs_table); +	dmi_check_system(pci_crs_quirks);  	/*  	 * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that @@ -70,9 +144,82 @@ void __init pci_acpi_crs_quirks(void)  	       pci_use_crs ? "nocrs" : "use_crs");  } -static acpi_status -resource_to_addr(struct acpi_resource *resource, -			struct acpi_resource_address64 *addr) +#ifdef	CONFIG_PCI_MMCONFIG +static int check_segment(u16 seg, struct device *dev, char *estr) +{ +	if (seg) { +		dev_err(dev, +			"%s can't access PCI configuration " +			"space under this host bridge.\n", +			estr); +		return -EIO; +	} + +	/* +	 * Failure in adding MMCFG information is not fatal, +	 * just can't access extended configuration space of +	 * devices under this host bridge. +	 */ +	dev_warn(dev, +		 "%s can't access extended PCI configuration " +		 "space under this bridge.\n", +		 estr); + +	return 0; +} + +static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start, +			  u8 end, phys_addr_t addr) +{ +	int result; +	struct device *dev = &info->bridge->dev; + +	info->start_bus = start; +	info->end_bus = end; +	info->mcfg_added = false; + +	/* return success if MMCFG is not in use */ +	if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) +		return 0; + +	if (!(pci_probe & PCI_PROBE_MMCONF)) +		return check_segment(seg, dev, "MMCONFIG is disabled,"); + +	result = pci_mmconfig_insert(dev, seg, start, end, addr); +	if (result == 0) { +		/* enable MMCFG if it hasn't been enabled yet */ +		if (raw_pci_ext_ops == NULL) +			raw_pci_ext_ops = &pci_mmcfg; +		info->mcfg_added = true; +	} else if (result != -EEXIST) +		return check_segment(seg, dev, +			 "fail to add MMCONFIG information,"); + +	return 0; +} + +static void teardown_mcfg_map(struct pci_root_info *info) +{ +	if (info->mcfg_added) { +		pci_mmconfig_delete(info->segment, info->start_bus, +				    info->end_bus); +		info->mcfg_added = false; +	} +} +#else +static int setup_mcfg_map(struct pci_root_info *info, +				    u16 seg, u8 start, u8 end, +				    phys_addr_t addr) +{ +	return 0; +} +static void teardown_mcfg_map(struct pci_root_info *info) +{ +} +#endif + +static acpi_status resource_to_addr(struct acpi_resource *resource, +				    struct acpi_resource_address64 *addr)  {  	acpi_status status;  	struct acpi_resource_memory24 *memory24; @@ -117,8 +264,7 @@ resource_to_addr(struct acpi_resource *resource,  	return AE_ERROR;  } -static acpi_status -count_resource(struct acpi_resource *acpi_res, void *data) +static acpi_status count_resource(struct acpi_resource *acpi_res, void *data)  {  	struct pci_root_info *info = data;  	struct acpi_resource_address64 addr; @@ -130,119 +276,210 @@ count_resource(struct acpi_resource *acpi_res, void *data)  	return AE_OK;  } -static acpi_status -setup_resource(struct acpi_resource *acpi_res, void *data) +static acpi_status setup_resource(struct acpi_resource *acpi_res, void *data)  {  	struct pci_root_info *info = data;  	struct resource *res;  	struct acpi_resource_address64 addr;  	acpi_status status;  	unsigned long flags; -	struct resource *root, *conflict; -	u64 start, end; +	u64 start, orig_end, end;  	status = resource_to_addr(acpi_res, &addr);  	if (!ACPI_SUCCESS(status))  		return AE_OK;  	if (addr.resource_type == ACPI_MEMORY_RANGE) { -		root = &iomem_resource;  		flags = IORESOURCE_MEM;  		if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY)  			flags |= IORESOURCE_PREFETCH;  	} else if (addr.resource_type == ACPI_IO_RANGE) { -		root = &ioport_resource;  		flags = IORESOURCE_IO;  	} else  		return AE_OK;  	start = addr.minimum + addr.translation_offset; -	end = addr.maximum + addr.translation_offset; +	orig_end = end = addr.maximum + addr.translation_offset; + +	/* Exclude non-addressable range or non-addressable portion of range */ +	end = min(end, (u64)iomem_resource.end); +	if (end <= start) { +		dev_info(&info->bridge->dev, +			"host bridge window [%#llx-%#llx] " +			"(ignored, not CPU addressable)\n", start, orig_end); +		return AE_OK; +	} else if (orig_end != end) { +		dev_info(&info->bridge->dev, +			"host bridge window [%#llx-%#llx] " +			"([%#llx-%#llx] ignored, not CPU addressable)\n",  +			start, orig_end, end + 1, orig_end); +	}  	res = &info->res[info->res_num];  	res->name = info->name;  	res->flags = flags;  	res->start = start;  	res->end = end; -	res->child = NULL; +	info->res_offset[info->res_num] = addr.translation_offset; +	info->res_num++; -	if (!pci_use_crs) { +	if (!pci_use_crs)  		dev_printk(KERN_DEBUG, &info->bridge->dev,  			   "host bridge window %pR (ignored)\n", res); -		return AE_OK; + +	return AE_OK; +} + +static void coalesce_windows(struct pci_root_info *info, unsigned long type) +{ +	int i, j; +	struct resource *res1, *res2; + +	for (i = 0; i < info->res_num; i++) { +		res1 = &info->res[i]; +		if (!(res1->flags & type)) +			continue; + +		for (j = i + 1; j < info->res_num; j++) { +			res2 = &info->res[j]; +			if (!(res2->flags & type)) +				continue; + +			/* +			 * I don't like throwing away windows because then +			 * our resources no longer match the ACPI _CRS, but +			 * the kernel resource tree doesn't allow overlaps. +			 */ +			if (resource_overlaps(res1, res2)) { +				res2->start = min(res1->start, res2->start); +				res2->end = max(res1->end, res2->end); +				dev_info(&info->bridge->dev, +					 "host bridge window expanded to %pR; %pR ignored\n", +					 res2, res1); +				res1->flags = 0; +			} +		}  	} +} -	conflict = insert_resource_conflict(root, res); -	if (conflict) { -		dev_err(&info->bridge->dev, -			"address space collision: host bridge window %pR " -			"conflicts with %s %pR\n", -			res, conflict->name, conflict); -	} else { -		pci_bus_add_resource(info->bus, res, 0); -		info->res_num++; -		if (addr.translation_offset) -			dev_info(&info->bridge->dev, "host bridge window %pR " -				 "(PCI address [%#llx-%#llx])\n", -				 res, res->start - addr.translation_offset, -				 res->end - addr.translation_offset); +static void add_resources(struct pci_root_info *info, +			  struct list_head *resources) +{ +	int i; +	struct resource *res, *root, *conflict; + +	coalesce_windows(info, IORESOURCE_MEM); +	coalesce_windows(info, IORESOURCE_IO); + +	for (i = 0; i < info->res_num; i++) { +		res = &info->res[i]; + +		if (res->flags & IORESOURCE_MEM) +			root = &iomem_resource; +		else if (res->flags & IORESOURCE_IO) +			root = &ioport_resource;  		else +			continue; + +		conflict = insert_resource_conflict(root, res); +		if (conflict)  			dev_info(&info->bridge->dev, -				 "host bridge window %pR\n", res); +				 "ignoring host bridge window %pR (conflicts with %s %pR)\n", +				 res, conflict->name, conflict); +		else +			pci_add_resource_offset(resources, res, +					info->res_offset[i]);  	} -	return AE_OK;  } -static void -get_current_resources(struct acpi_device *device, int busnum, -			int domain, struct pci_bus *bus) +static void free_pci_root_info_res(struct pci_root_info *info) +{ +	kfree(info->res); +	info->res = NULL; +	kfree(info->res_offset); +	info->res_offset = NULL; +	info->res_num = 0; +} + +static void __release_pci_root_info(struct pci_root_info *info) +{ +	int i; +	struct resource *res; + +	for (i = 0; i < info->res_num; i++) { +		res = &info->res[i]; + +		if (!res->parent) +			continue; + +		if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) +			continue; + +		release_resource(res); +	} + +	free_pci_root_info_res(info); + +	teardown_mcfg_map(info); + +	kfree(info); +} + +static void release_pci_root_info(struct pci_host_bridge *bridge) +{ +	struct pci_root_info *info = bridge->release_data; + +	__release_pci_root_info(info); +} + +static void probe_pci_root_info(struct pci_root_info *info, +				struct acpi_device *device, +				int busnum, int domain)  { -	struct pci_root_info info;  	size_t size; -	if (pci_use_crs) -		pci_bus_remove_resources(bus); +	sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); +	info->bridge = device; -	info.bridge = device; -	info.bus = bus; -	info.res_num = 0; +	info->res_num = 0;  	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, -				&info); -	if (!info.res_num) +				info); +	if (!info->res_num)  		return; -	size = sizeof(*info.res) * info.res_num; -	info.res = kmalloc(size, GFP_KERNEL); -	if (!info.res) -		goto res_alloc_fail; +	size = sizeof(*info->res) * info->res_num; +	info->res = kzalloc(size, GFP_KERNEL); +	if (!info->res) { +		info->res_num = 0; +		return; +	} -	info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); -	if (!info.name) -		goto name_alloc_fail; +	size = sizeof(*info->res_offset) * info->res_num; +	info->res_num = 0; +	info->res_offset = kzalloc(size, GFP_KERNEL); +	if (!info->res_offset) { +		kfree(info->res); +		info->res = NULL; +		return; +	} -	info.res_num = 0;  	acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, -				&info); - -	return; - -name_alloc_fail: -	kfree(info.res); -res_alloc_fail: -	return; +				info);  } -struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) +struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)  {  	struct acpi_device *device = root->device; +	struct pci_root_info *info;  	int domain = root->segment;  	int busnum = root->secondary.start; +	LIST_HEAD(resources);  	struct pci_bus *bus;  	struct pci_sysdata *sd;  	int node; -#ifdef CONFIG_ACPI_NUMA -	int pxm; -#endif + +	if (pci_ignore_seg) +		domain = 0;  	if (domain && !pci_domains_supported) {  		printk(KERN_WARNING "pci_bus %04x:%02x: " @@ -251,69 +488,92 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)  		return NULL;  	} -	node = -1; -#ifdef CONFIG_ACPI_NUMA -	pxm = acpi_get_pxm(device->handle); -	if (pxm >= 0) -		node = pxm_to_node(pxm); -	if (node != -1) -		set_mp_bus_to_node(busnum, node); -	else -#endif -		node = get_mp_bus_to_node(busnum); +	node = acpi_get_node(device->handle); +	if (node == NUMA_NO_NODE) { +		node = x86_pci_root_bus_node(busnum); +		if (node != 0 && node != NUMA_NO_NODE) +			dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n", +				node); +	} -	if (node != -1 && !node_online(node)) -		node = -1; +	if (node != NUMA_NO_NODE && !node_online(node)) +		node = NUMA_NO_NODE; -	/* Allocate per-root-bus (not per bus) arch-specific data. -	 * TODO: leak; this memory is never freed. -	 * It's arguable whether it's worth the trouble to care. -	 */ -	sd = kzalloc(sizeof(*sd), GFP_KERNEL); -	if (!sd) { +	info = kzalloc(sizeof(*info), GFP_KERNEL); +	if (!info) {  		printk(KERN_WARNING "pci_bus %04x:%02x: "  		       "ignored (out of memory)\n", domain, busnum);  		return NULL;  	} +	sd = &info->sd;  	sd->domain = domain;  	sd->node = node; -	/* -	 * Maybe the desired pci bus has been already scanned. In such case -	 * it is unnecessary to scan the pci bus with the given domain,busnum. -	 */ +	sd->companion = device; +  	bus = pci_find_bus(domain, busnum);  	if (bus) {  		/* -		 * If the desired bus exits, the content of bus->sysdata will -		 * be replaced by sd. +		 * If the desired bus has been scanned already, replace +		 * its bus->sysdata.  		 */  		memcpy(bus->sysdata, sd, sizeof(*sd)); -		kfree(sd); +		kfree(info);  	} else { -		bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); +		probe_pci_root_info(info, device, busnum, domain); + +		/* insert busn res at first */ +		pci_add_resource(&resources,  &root->secondary); +		/* +		 * _CRS with no apertures is normal, so only fall back to +		 * defaults or native bridge info if we're ignoring _CRS. +		 */ +		if (pci_use_crs) +			add_resources(info, &resources); +		else { +			free_pci_root_info_res(info); +			x86_pci_root_bus_resources(busnum, &resources); +		} + +		if (!setup_mcfg_map(info, domain, (u8)root->secondary.start, +				    (u8)root->secondary.end, root->mcfg_addr)) +			bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, +						  sd, &resources); +  		if (bus) { -			get_current_resources(device, busnum, domain, bus); -			bus->subordinate = pci_scan_child_bus(bus); +			pci_scan_child_bus(bus); +			pci_set_host_bridge_release( +				to_pci_host_bridge(bus->bridge), +				release_pci_root_info, info); +		} else { +			pci_free_resource_list(&resources); +			__release_pci_root_info(info);  		}  	} -	if (!bus) -		kfree(sd); +	/* After the PCI-E bus has been walked and all devices discovered, +	 * configure any settings of the fabric that might be necessary. +	 */ +	if (bus) { +		struct pci_bus *child; +		list_for_each_entry(child, &bus->children, node) +			pcie_bus_configure_settings(child); +	} -	if (bus && node != -1) { -#ifdef CONFIG_ACPI_NUMA -		if (pxm >= 0) -			dev_printk(KERN_DEBUG, &bus->dev, -				   "on NUMA node %d (pxm %d)\n", node, pxm); -#else +	if (bus && node != NUMA_NO_NODE)  		dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); -#endif -	}  	return bus;  } +int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) +{ +	struct pci_sysdata *sd = bridge->bus->sysdata; + +	ACPI_COMPANION_SET(&bridge->dev, sd->companion); +	return 0; +} +  int __init pci_acpi_init(void)  {  	struct pci_dev *dev = NULL; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index fc1e8fe07e5..c20d2cc7ef6 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -4,71 +4,61 @@  #include <linux/cpu.h>  #include <linux/range.h> +#include <asm/amd_nb.h>  #include <asm/pci_x86.h>  #include <asm/pci-direct.h>  #include "bus_numa.h" -/* - * This discovers the pcibus <-> node mapping on AMD K8. - * also get peer root bus resource for io,mmio - */ +#define AMD_NB_F0_NODE_ID			0x60 +#define AMD_NB_F0_UNIT_ID			0x64 +#define AMD_NB_F1_CONFIG_MAP_REG		0xe0 -struct pci_hostbridge_probe { +#define RANGE_NUM				16 +#define AMD_NB_F1_CONFIG_MAP_RANGES		4 + +struct amd_hostbridge {  	u32 bus;  	u32 slot; -	u32 vendor;  	u32 device;  }; -static struct pci_hostbridge_probe pci_probes[] __initdata = { -	{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 }, -	{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, -	{ 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, -	{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, +/* + * IMPORTANT NOTE: + * hb_probes[] and early_root_info_init() is in maintenance mode. + * It only supports K8, Fam10h, Fam11h, and Fam15h_00h-0fh . + * Future processor will rely on information in ACPI. + */ +static struct amd_hostbridge hb_probes[] __initdata = { +	{ 0, 0x18, 0x1100 }, /* K8 */ +	{ 0, 0x18, 0x1200 }, /* Family10h */ +	{ 0xff, 0, 0x1200 }, /* Family10h */ +	{ 0, 0x18, 0x1300 }, /* Family11h */ +	{ 0, 0x18, 0x1600 }, /* Family15h */  }; -static u64 __initdata fam10h_mmconf_start; -static u64 __initdata fam10h_mmconf_end; -static void __init get_pci_mmcfg_amd_fam10h_range(void) +static struct pci_root_info __init *find_pci_root_info(int node, int link)  { -	u32 address; -	u64 base, msr; -	unsigned segn_busn_bits; - -	/* assume all cpus from fam10h have mmconf */ -        if (boot_cpu_data.x86 < 0x10) -		return; - -	address = MSR_FAM10H_MMIO_CONF_BASE; -	rdmsrl(address, msr); - -	/* mmconfig is not enable */ -	if (!(msr & FAM10H_MMIO_CONF_ENABLE)) -		return; - -	base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); +	struct pci_root_info *info; -	segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & -			 FAM10H_MMIO_CONF_BUSRANGE_MASK; +	/* find the position */ +	list_for_each_entry(info, &pci_root_infos, list) +		if (info->node == node && info->link == link) +			return info; -	fam10h_mmconf_start = base; -	fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; +	return NULL;  } -#define RANGE_NUM 16 -  /** - * early_fill_mp_bus_to_node() + * early_root_info_init()   * called before pcibios_scan_root and pci_scan_bus - * fills the mp_bus_to_cpumask array based according to the LDT Bus Number - * Registers found in the K8 northbridge + * fills the mp_bus_to_cpumask array based according + * to the LDT Bus Number Registers found in the northbridge.   */ -static int __init early_fill_mp_bus_info(void) +static int __init early_root_info_init(void)  {  	int i; -	int j;  	unsigned bus;  	unsigned slot;  	int node; @@ -77,31 +67,35 @@ static int __init early_fill_mp_bus_info(void)  	int def_link;  	struct pci_root_info *info;  	u32 reg; -	struct resource *res;  	u64 start;  	u64 end;  	struct range range[RANGE_NUM];  	u64 val;  	u32 address;  	bool found; +	struct resource fam10h_mmconf_res, *fam10h_mmconf; +	u64 fam10h_mmconf_start; +	u64 fam10h_mmconf_end;  	if (!early_pci_allowed())  		return -1;  	found = false; -	for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { +	for (i = 0; i < ARRAY_SIZE(hb_probes); i++) {  		u32 id;  		u16 device;  		u16 vendor; -		bus = pci_probes[i].bus; -		slot = pci_probes[i].slot; +		bus = hb_probes[i].bus; +		slot = hb_probes[i].slot;  		id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); -  		vendor = id & 0xffff;  		device = (id>>16) & 0xffff; -		if (pci_probes[i].vendor == vendor && -		    pci_probes[i].device == device) { + +		if (vendor != PCI_VENDOR_ID_AMD) +			continue; + +		if (hb_probes[i].device == device) {  			found = true;  			break;  		} @@ -110,11 +104,16 @@ static int __init early_fill_mp_bus_info(void)  	if (!found)  		return 0; -	pci_root_num = 0; -	for (i = 0; i < 4; i++) { +	/* +	 * We should learn topology and routing information from _PXM and +	 * _CRS methods in the ACPI namespace.  We extract node numbers +	 * here to work around BIOSes that don't supply _PXM. +	 */ +	for (i = 0; i < AMD_NB_F1_CONFIG_MAP_RANGES; i++) {  		int min_bus;  		int max_bus; -		reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2)); +		reg = read_pci_config(bus, slot, 1, +				AMD_NB_F1_CONFIG_MAP_REG + (i << 2));  		/* Check if that register is enabled for bus range */  		if ((reg & 7) != 3) @@ -123,25 +122,26 @@ static int __init early_fill_mp_bus_info(void)  		min_bus = (reg >> 16) & 0xff;  		max_bus = (reg >> 24) & 0xff;  		node = (reg >> 4) & 0x07; -#ifdef CONFIG_NUMA -		for (j = min_bus; j <= max_bus; j++) -			set_mp_bus_to_node(j, node); -#endif  		link = (reg >> 8) & 0x03; -		info = &pci_root_info[pci_root_num]; -		info->bus_min = min_bus; -		info->bus_max = max_bus; -		info->node = node; -		info->link = link; -		sprintf(info->name, "PCI Bus #%02x", min_bus); -		pci_root_num++; +		info = alloc_pci_root_info(min_bus, max_bus, node, link);  	} +	/* +	 * The following code extracts routing information for use on old +	 * systems where Linux doesn't automatically use host bridge _CRS +	 * methods (or when the user specifies "pci=nocrs"). +	 * +	 * We only do this through Fam11h, because _CRS should be enough on +	 * newer systems. +	 */ +	if (boot_cpu_data.x86 > 0x11) +		return 0; +  	/* get the default node and link for left over res */ -	reg = read_pci_config(bus, slot, 0, 0x60); +	reg = read_pci_config(bus, slot, 0, AMD_NB_F0_NODE_ID);  	def_node = (reg >> 8) & 0x07; -	reg = read_pci_config(bus, slot, 0, 0x64); +	reg = read_pci_config(bus, slot, 0, AMD_NB_F0_UNIT_ID);  	def_link = (reg >> 8) & 0x03;  	memset(range, 0, sizeof(range)); @@ -158,16 +158,10 @@ static int __init early_fill_mp_bus_info(void)  		link = (reg >> 4) & 0x03;  		end = (reg & 0xfff000) | 0xfff; -		/* find the position */ -		for (j = 0; j < pci_root_num; j++) { -			info = &pci_root_info[j]; -			if (info->node == node && info->link == link) -				break; -		} -		if (j == pci_root_num) +		info = find_pci_root_info(node, link); +		if (!info)  			continue; /* not found */ -		info = &pci_root_info[j];  		printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",  		       node, link, start, end); @@ -179,13 +173,8 @@ static int __init early_fill_mp_bus_info(void)  	}  	/* add left over io port range to def node/link, [0, 0xffff] */  	/* find the position */ -	for (j = 0; j < pci_root_num; j++) { -		info = &pci_root_info[j]; -		if (info->node == def_node && info->link == def_link) -			break; -	} -	if (j < pci_root_num) { -		info = &pci_root_info[j]; +	info = find_pci_root_info(def_node, def_link); +	if (info) {  		for (i = 0; i < RANGE_NUM; i++) {  			if (!range[i].end)  				continue; @@ -210,12 +199,17 @@ static int __init early_fill_mp_bus_info(void)  		subtract_range(range, RANGE_NUM, 0, end);  	/* get mmconfig */ -	get_pci_mmcfg_amd_fam10h_range(); +	fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res);  	/* need to take out mmconf range */ -	if (fam10h_mmconf_end) { -		printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); +	if (fam10h_mmconf) { +		printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf); +		fam10h_mmconf_start = fam10h_mmconf->start; +		fam10h_mmconf_end = fam10h_mmconf->end;  		subtract_range(range, RANGE_NUM, fam10h_mmconf_start,  				 fam10h_mmconf_end + 1); +	} else { +		fam10h_mmconf_start = 0; +		fam10h_mmconf_end = 0;  	}  	/* mmio resource */ @@ -233,16 +227,10 @@ static int __init early_fill_mp_bus_info(void)  		end <<= 8;  		end |= 0xffff; -		/* find the position */ -		for (j = 0; j < pci_root_num; j++) { -			info = &pci_root_info[j]; -			if (info->node == node && info->link == link) -				break; -		} -		if (j == pci_root_num) -			continue; /* not found */ +		info = find_pci_root_info(node, link); -		info = &pci_root_info[j]; +		if (!info) +			continue;  		printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",  		       node, link, start, end); @@ -310,14 +298,8 @@ static int __init early_fill_mp_bus_info(void)  	 * add left over mmio range to def node/link ?  	 * that is tricky, just record range in from start_min to 4G  	 */ -	for (j = 0; j < pci_root_num; j++) { -		info = &pci_root_info[j]; -		if (info->node == def_node && info->link == def_link) -			break; -	} -	if (j < pci_root_num) { -		info = &pci_root_info[j]; - +	info = find_pci_root_info(def_node, def_link); +	if (info) {  		for (i = 0; i < RANGE_NUM; i++) {  			if (!range[i].end)  				continue; @@ -328,20 +310,16 @@ static int __init early_fill_mp_bus_info(void)  		}  	} -	for (i = 0; i < pci_root_num; i++) { -		int res_num; +	list_for_each_entry(info, &pci_root_infos, list) {  		int busnum; - -		info = &pci_root_info[i]; -		res_num = info->res_num; -		busnum = info->bus_min; -		printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", -		       info->bus_min, info->bus_max, info->node, info->link); -		for (j = 0; j < res_num; j++) { -			res = &info->res[j]; -			printk(KERN_DEBUG "bus: %02x index %x %pR\n", -				       busnum, j, res); -		} +		struct pci_root_res *root_res; + +		busnum = info->busn.start; +		printk(KERN_DEBUG "bus: %pR on node %x link %x\n", +		       &info->busn, info->node, info->link); +		list_for_each_entry(root_res, &info->resources, list) +			printk(KERN_DEBUG "bus: %02x %pR\n", +				       busnum, &root_res->res);  	}  	return 0; @@ -359,8 +337,8 @@ static void enable_pci_io_ecs(void *unused)  	}  } -static int __cpuinit amd_cpu_notify(struct notifier_block *self, -				    unsigned long action, void *hcpu) +static int amd_cpu_notify(struct notifier_block *self, unsigned long action, +			  void *hcpu)  {  	int cpu = (long)hcpu;  	switch (action) { @@ -374,22 +352,56 @@ static int __cpuinit amd_cpu_notify(struct notifier_block *self,  	return NOTIFY_OK;  } -static struct notifier_block __cpuinitdata amd_cpu_notifier = { +static struct notifier_block amd_cpu_notifier = {  	.notifier_call	= amd_cpu_notify,  }; +static void __init pci_enable_pci_io_ecs(void) +{ +#ifdef CONFIG_AMD_NB +	unsigned int i, n; + +	for (n = i = 0; !n && amd_nb_bus_dev_ranges[i].dev_limit; ++i) { +		u8 bus = amd_nb_bus_dev_ranges[i].bus; +		u8 slot = amd_nb_bus_dev_ranges[i].dev_base; +		u8 limit = amd_nb_bus_dev_ranges[i].dev_limit; + +		for (; slot < limit; ++slot) { +			u32 val = read_pci_config(bus, slot, 3, 0); + +			if (!early_is_amd_nb(val)) +				continue; + +			val = read_pci_config(bus, slot, 3, 0x8c); +			if (!(val & (ENABLE_CF8_EXT_CFG >> 32))) { +				val |= ENABLE_CF8_EXT_CFG >> 32; +				write_pci_config(bus, slot, 3, 0x8c, val); +			} +			++n; +		} +	} +#endif +} +  static int __init pci_io_ecs_init(void)  {  	int cpu;  	/* assume all cpus from fam10h have IO ECS */ -        if (boot_cpu_data.x86 < 0x10) +	if (boot_cpu_data.x86 < 0x10)  		return 0; -	register_cpu_notifier(&amd_cpu_notifier); +	/* Try the PCI method first. */ +	if (early_pci_allowed()) +		pci_enable_pci_io_ecs(); + +	cpu_notifier_register_begin();  	for_each_online_cpu(cpu)  		amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,  			       (void *)(long)cpu); +	__register_cpu_notifier(&amd_cpu_notifier); +	cpu_notifier_register_done(); +  	pci_probe |= PCI_HAS_IO_ECS;  	return 0; @@ -400,7 +412,7 @@ static int __init amd_postcore_init(void)  	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)  		return 0; -	early_fill_mp_bus_info(); +	early_root_info_init();  	pci_io_ecs_init();  	return 0; diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index 0846a5bbbfb..bb461cfd01a 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -9,37 +9,28 @@   * option) any later version.   */ +#include <linux/acpi.h>  #include <linux/delay.h>  #include <linux/dmi.h>  #include <linux/pci.h>  #include <linux/init.h>  #include <asm/pci_x86.h> +#include <asm/pci-direct.h>  #include "bus_numa.h" -static void __devinit cnb20le_res(struct pci_dev *dev) +static void __init cnb20le_res(u8 bus, u8 slot, u8 func)  {  	struct pci_root_info *info; +	struct pci_root_res *root_res;  	struct resource res;  	u16 word1, word2;  	u8 fbus, lbus; -	int i; - -	/* -	 * The x86_pci_root_bus_res_quirks() function already refuses to use -	 * this information if ACPI _CRS was used. Therefore, we don't bother -	 * checking if ACPI is enabled, and just generate the information -	 * for both the ACPI _CRS and no ACPI cases. -	 */ - -	info = &pci_root_info[pci_root_num]; -	pci_root_num++;  	/* read the PCI bus numbers */ -	pci_read_config_byte(dev, 0x44, &fbus); -	pci_read_config_byte(dev, 0x45, &lbus); -	info->bus_min = fbus; -	info->bus_max = lbus; +	fbus = read_pci_config_byte(bus, slot, func, 0x44); +	lbus = read_pci_config_byte(bus, slot, func, 0x45); +	info = alloc_pci_root_info(fbus, lbus, 0, 0);  	/*  	 * Add the legacy IDE ports on bus 0 @@ -56,8 +47,8 @@ static void __devinit cnb20le_res(struct pci_dev *dev)  	}  	/* read the non-prefetchable memory window */ -	pci_read_config_word(dev, 0xc0, &word1); -	pci_read_config_word(dev, 0xc2, &word2); +	word1 = read_pci_config_16(bus, slot, func, 0xc0); +	word2 = read_pci_config_16(bus, slot, func, 0xc2);  	if (word1 != word2) {  		res.start = (word1 << 16) | 0x0000;  		res.end   = (word2 << 16) | 0xffff; @@ -66,18 +57,18 @@ static void __devinit cnb20le_res(struct pci_dev *dev)  	}  	/* read the prefetchable memory window */ -	pci_read_config_word(dev, 0xc4, &word1); -	pci_read_config_word(dev, 0xc6, &word2); +	word1 = read_pci_config_16(bus, slot, func, 0xc4); +	word2 = read_pci_config_16(bus, slot, func, 0xc6);  	if (word1 != word2) { -		res.start = (word1 << 16) | 0x0000; -		res.end   = (word2 << 16) | 0xffff; +		res.start = ((resource_size_t) word1 << 16) | 0x0000; +		res.end   = ((resource_size_t) word2 << 16) | 0xffff;  		res.flags = IORESOURCE_MEM | IORESOURCE_PREFETCH;  		update_res(info, res.start, res.end, res.flags, 0);  	}  	/* read the IO port window */ -	pci_read_config_word(dev, 0xd0, &word1); -	pci_read_config_word(dev, 0xd2, &word2); +	word1 = read_pci_config_16(bus, slot, func, 0xd0); +	word2 = read_pci_config_16(bus, slot, func, 0xd2);  	if (word1 != word2) {  		res.start = word1;  		res.end   = word2; @@ -89,13 +80,37 @@ static void __devinit cnb20le_res(struct pci_dev *dev)  	res.start = fbus;  	res.end   = lbus;  	res.flags = IORESOURCE_BUS; -	dev_info(&dev->dev, "CNB20LE PCI Host Bridge (domain %04x %pR)\n", -			    pci_domain_nr(dev->bus), &res); +	printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res); -	for (i = 0; i < info->res_num; i++) -		dev_info(&dev->dev, "host bridge window %pR\n", &info->res[i]); +	list_for_each_entry(root_res, &info->resources, list) +		printk(KERN_INFO "host bridge window %pR\n", &root_res->res);  } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, -			cnb20le_res); +static int __init broadcom_postcore_init(void) +{ +	u8 bus = 0, slot = 0; +	u32 id; +	u16 vendor, device; + +#ifdef CONFIG_ACPI +	/* +	 * We should get host bridge information from ACPI unless the BIOS +	 * doesn't support it. +	 */ +	if (acpi_os_get_root_pointer()) +		return 0; +#endif + +	id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); +	vendor = id & 0xffff; +	device = (id >> 16) & 0xffff; + +	if (vendor == PCI_VENDOR_ID_SERVERWORKS && +	    device == PCI_DEVICE_ID_SERVERWORKS_LE) { +		cnb20le_res(bus, slot, 0); +		cnb20le_res(bus, slot, 1); +	} +	return 0; +} +postcore_initcall(broadcom_postcore_init); diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 64a12288389..f3a2cfc1412 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c @@ -4,55 +4,107 @@  #include "bus_numa.h" -int pci_root_num; -struct pci_root_info pci_root_info[PCI_ROOT_NR]; +LIST_HEAD(pci_root_infos); -void x86_pci_root_bus_res_quirks(struct pci_bus *b) +static struct pci_root_info *x86_find_pci_root_info(int bus)  { -	int i; -	int j;  	struct pci_root_info *info; -	/* don't go for it if _CRS is used already */ -	if (b->resource[0] != &ioport_resource || -	    b->resource[1] != &iomem_resource) -		return; +	list_for_each_entry(info, &pci_root_infos, list) +		if (info->busn.start == bus) +			return info; -	if (!pci_root_num) -		return; +	return NULL; +} -	for (i = 0; i < pci_root_num; i++) { -		if (pci_root_info[i].bus_min == b->number) -			break; -	} +int x86_pci_root_bus_node(int bus) +{ +	struct pci_root_info *info = x86_find_pci_root_info(bus); -	if (i == pci_root_num) -		return; +	if (!info) +		return NUMA_NO_NODE; + +	return info->node; +} + +void x86_pci_root_bus_resources(int bus, struct list_head *resources) +{ +	struct pci_root_info *info = x86_find_pci_root_info(bus); +	struct pci_root_res *root_res; +	struct pci_host_bridge_window *window; +	bool found = false; + +	if (!info) +		goto default_resources; + +	printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", +	       bus); -	printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", -			b->number); +	/* already added by acpi ? */ +	list_for_each_entry(window, resources, list) +		if (window->res->flags & IORESOURCE_BUS) { +			found = true; +			break; +		} + +	if (!found) +		pci_add_resource(resources, &info->busn); -	pci_bus_remove_resources(b); -	info = &pci_root_info[i]; -	for (j = 0; j < info->res_num; j++) { +	list_for_each_entry(root_res, &info->resources, list) {  		struct resource *res;  		struct resource *root; -		res = &info->res[j]; -		pci_bus_add_resource(b, res, 0); +		res = &root_res->res; +		pci_add_resource(resources, res);  		if (res->flags & IORESOURCE_IO)  			root = &ioport_resource;  		else  			root = &iomem_resource;  		insert_resource(root, res);  	} +	return; + +default_resources: +	/* +	 * We don't have any host bridge aperture information from the +	 * "native host bridge drivers," e.g., amd_bus or broadcom_bus, +	 * so fall back to the defaults historically used by pci_create_bus(). +	 */ +	printk(KERN_DEBUG "PCI: root bus %02x: using default resources\n", bus); +	pci_add_resource(resources, &ioport_resource); +	pci_add_resource(resources, &iomem_resource);  } -void __devinit update_res(struct pci_root_info *info, resource_size_t start, -			  resource_size_t end, unsigned long flags, int merge) +struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max, +						 int node, int link) +{ +	struct pci_root_info *info; + +	info = kzalloc(sizeof(*info), GFP_KERNEL); + +	if (!info) +		return info; + +	sprintf(info->name, "PCI Bus #%02x", bus_min); + +	INIT_LIST_HEAD(&info->resources); +	info->busn.name  = info->name; +	info->busn.start = bus_min; +	info->busn.end   = bus_max; +	info->busn.flags = IORESOURCE_BUS; +	info->node = node; +	info->link = link; + +	list_add_tail(&info->list, &pci_root_infos); + +	return info; +} + +void update_res(struct pci_root_info *info, resource_size_t start, +		resource_size_t end, unsigned long flags, int merge)  { -	int i;  	struct resource *res; +	struct pci_root_res *root_res;  	if (start > end)  		return; @@ -64,11 +116,11 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,  		goto addit;  	/* try to merge it with old one */ -	for (i = 0; i < info->res_num; i++) { +	list_for_each_entry(root_res, &info->resources, list) {  		resource_size_t final_start, final_end;  		resource_size_t common_start, common_end; -		res = &info->res[i]; +		res = &root_res->res;  		if (res->flags != flags)  			continue; @@ -88,14 +140,15 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,  addit:  	/* need to add that */ -	if (info->res_num >= RES_NUM) +	root_res = kzalloc(sizeof(*root_res), GFP_KERNEL); +	if (!root_res)  		return; -	res = &info->res[info->res_num]; +	res = &root_res->res;  	res->name = info->name;  	res->flags = flags;  	res->start = start;  	res->end = end; -	res->child = NULL; -	info->res_num++; + +	list_add_tail(&root_res->list, &info->resources);  } diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 804a4b40c31..ff8f65b0457 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h @@ -4,22 +4,23 @@   * sub bus (transparent) will use entres from 3 to store extra from   * root, so need to make sure we have enough slot there.   */ -#define RES_NUM 16 +struct pci_root_res { +	struct list_head list; +	struct resource res; +}; +  struct pci_root_info { +	struct list_head list;  	char name[12]; -	unsigned int res_num; -	struct resource res[RES_NUM]; -	int bus_min; -	int bus_max; +	struct list_head resources; +	struct resource busn;  	int node;  	int link;  }; -/* 4 at this time, it may become to 32 */ -#define PCI_ROOT_NR 4 -extern int pci_root_num; -extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; - +extern struct list_head pci_root_infos; +struct pci_root_info *alloc_pci_root_info(int bus_min, int bus_max, +						int node, int link);  extern void update_res(struct pci_root_info *info, resource_size_t start,  		      resource_size_t end, unsigned long flags, int merge);  #endif diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c new file mode 100644 index 00000000000..b914e20b5a0 --- /dev/null +++ b/arch/x86/pci/ce4100.c @@ -0,0 +1,331 @@ +/* + *  GPL LICENSE SUMMARY + * + *  Copyright(c) 2010 Intel Corporation. All rights reserved. + * + *  This program is free software; you can redistribute it and/or modify + *  it under the terms of version 2 of the GNU General Public License as + *  published by the Free Software Foundation. + * + *  This program is distributed in the hope that it will be useful, but + *  WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + *  General Public License for more details. + * + *  You should have received a copy of the GNU General Public License + *  along with this program; if not, write to the Free Software + *  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + *  The full GNU General Public License is included in this distribution + *  in the file called LICENSE.GPL. + * + *  Contact Information: + *    Intel Corporation + *    2200 Mission College Blvd. + *    Santa Clara, CA  97052 + * + * This provides access methods for PCI registers that mis-behave on + * the CE4100. Each register can be assigned a private init, read and + * write routine. The exception to this is the bridge device.  The + * bridge device is the only device on bus zero (0) that requires any + * fixup so it is a special case ATM + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/init.h> + +#include <asm/ce4100.h> +#include <asm/pci_x86.h> + +struct sim_reg { +	u32 value; +	u32 mask; +}; + +struct sim_dev_reg { +	int dev_func; +	int reg; +	void (*init)(struct sim_dev_reg *reg); +	void (*read)(struct sim_dev_reg *reg, u32 *value); +	void (*write)(struct sim_dev_reg *reg, u32 value); +	struct sim_reg sim_reg; +}; + +struct sim_reg_op { +	void (*init)(struct sim_dev_reg *reg); +	void (*read)(struct sim_dev_reg *reg, u32 value); +	void (*write)(struct sim_dev_reg *reg, u32 value); +}; + +#define MB (1024 * 1024) +#define KB (1024) +#define SIZE_TO_MASK(size) (~(size - 1)) + +#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\ +{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\ +	{0, SIZE_TO_MASK(size)} }, + +static void reg_init(struct sim_dev_reg *reg) +{ +	pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4, +			      ®->sim_reg.value); +} + +static void reg_read(struct sim_dev_reg *reg, u32 *value) +{ +	unsigned long flags; + +	raw_spin_lock_irqsave(&pci_config_lock, flags); +	*value = reg->sim_reg.value; +	raw_spin_unlock_irqrestore(&pci_config_lock, flags); +} + +static void reg_write(struct sim_dev_reg *reg, u32 value) +{ +	unsigned long flags; + +	raw_spin_lock_irqsave(&pci_config_lock, flags); +	reg->sim_reg.value = (value & reg->sim_reg.mask) | +		(reg->sim_reg.value & ~reg->sim_reg.mask); +	raw_spin_unlock_irqrestore(&pci_config_lock, flags); +} + +static void sata_reg_init(struct sim_dev_reg *reg) +{ +	pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4, +			      ®->sim_reg.value); +	reg->sim_reg.value += 0x400; +} + +static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value) +{ +	reg_read(reg, value); +	if (*value != reg->sim_reg.mask) +		*value |= 0x100; +} + +void sata_revid_init(struct sim_dev_reg *reg) +{ +	reg->sim_reg.value = 0x01060100; +	reg->sim_reg.mask = 0; +} + +static void sata_revid_read(struct sim_dev_reg *reg, u32 *value) +{ +	reg_read(reg, value); +} + +static void reg_noirq_read(struct sim_dev_reg *reg, u32 *value) +{ +	unsigned long flags; + +	raw_spin_lock_irqsave(&pci_config_lock, flags); +	/* force interrupt pin value to 0 */ +	*value = reg->sim_reg.value & 0xfff00ff; +	raw_spin_unlock_irqrestore(&pci_config_lock, flags); +} + +static struct sim_dev_reg bus1_fixups[] = { +	DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write) +	DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write) +	DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write) +	DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(11, 7, 0x3c, 256, reg_init, reg_noirq_read, reg_write) +	DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write) +	DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write) +	DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write) +	DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write) +	DEFINE_REG(14, 0, 0x8,  0, sata_revid_init, sata_revid_read, 0) +	DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write) +	DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write) +	DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write) +	DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write) +	DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write) +	DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write) +	DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write) +	DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write) +	DEFINE_REG(16, 0, 0x3c, 256, reg_init, reg_noirq_read, reg_write) +	DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write) +	DEFINE_REG(18, 0, 0x3c, 256, reg_init, reg_noirq_read, reg_write) +}; + +static void __init init_sim_regs(void) +{ +	int i; + +	for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { +		if (bus1_fixups[i].init) +			bus1_fixups[i].init(&bus1_fixups[i]); +	} +} + +static inline void extract_bytes(u32 *value, int reg, int len) +{ +	uint32_t mask; + +	*value >>= ((reg & 3) * 8); +	mask = 0xFFFFFFFF >> ((4 - len) * 8); +	*value &= mask; +} + +int bridge_read(unsigned int devfn, int reg, int len, u32 *value) +{ +	u32 av_bridge_base, av_bridge_limit; +	int retval = 0; + +	switch (reg) { +	/* Make BARs appear to not request any memory. */ +	case PCI_BASE_ADDRESS_0: +	case PCI_BASE_ADDRESS_0 + 1: +	case PCI_BASE_ADDRESS_0 + 2: +	case PCI_BASE_ADDRESS_0 + 3: +		*value = 0; +		break; + +		/* Since subordinate bus number register is hardwired +		 * to zero and read only, so do the simulation. +		 */ +	case PCI_PRIMARY_BUS: +		if (len == 4) +			*value = 0x00010100; +		break; + +	case PCI_SUBORDINATE_BUS: +		*value = 1; +		break; + +	case PCI_MEMORY_BASE: +	case PCI_MEMORY_LIMIT: +		/* Get the A/V bridge base address. */ +		pci_direct_conf1.read(0, 0, devfn, +				PCI_BASE_ADDRESS_0, 4, &av_bridge_base); + +		av_bridge_limit = av_bridge_base + (512*MB - 1); +		av_bridge_limit >>= 16; +		av_bridge_limit &= 0xFFF0; + +		av_bridge_base >>= 16; +		av_bridge_base &= 0xFFF0; + +		if (reg == PCI_MEMORY_LIMIT) +			*value = av_bridge_limit; +		else if (len == 2) +			*value = av_bridge_base; +		else +			*value = (av_bridge_limit << 16) | av_bridge_base; +		break; +		/* Make prefetchable memory limit smaller than prefetchable +		 * memory base, so not claim prefetchable memory space. +		 */ +	case PCI_PREF_MEMORY_BASE: +		*value = 0xFFF0; +		break; +	case PCI_PREF_MEMORY_LIMIT: +		*value = 0x0; +		break; +		/* Make IO limit smaller than IO base, so not claim IO space. */ +	case PCI_IO_BASE: +		*value = 0xF0; +		break; +	case PCI_IO_LIMIT: +		*value = 0; +		break; +	default: +		retval = 1; +	} +	return retval; +} + +static int ce4100_conf_read(unsigned int seg, unsigned int bus, +			    unsigned int devfn, int reg, int len, u32 *value) +{ +	int i; + +	WARN_ON(seg); +	if (bus == 1) { +		for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { +			if (bus1_fixups[i].dev_func == devfn && +			    bus1_fixups[i].reg == (reg & ~3) && +			    bus1_fixups[i].read) { +				bus1_fixups[i].read(&(bus1_fixups[i]), +						    value); +				extract_bytes(value, reg, len); +				return 0; +			} +		} +	} + +	if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) && +	    !bridge_read(devfn, reg, len, value)) +		return 0; + +	return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); +} + +static int ce4100_conf_write(unsigned int seg, unsigned int bus, +			     unsigned int devfn, int reg, int len, u32 value) +{ +	int i; + +	WARN_ON(seg); +	if (bus == 1) { +		for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { +			if (bus1_fixups[i].dev_func == devfn && +			    bus1_fixups[i].reg == (reg & ~3) && +			    bus1_fixups[i].write) { +				bus1_fixups[i].write(&(bus1_fixups[i]), +						     value); +				return 0; +			} +		} +	} + +	/* Discard writes to A/V bridge BAR. */ +	if (bus == 0 && PCI_DEVFN(1, 0) == devfn && +	    ((reg & ~3) == PCI_BASE_ADDRESS_0)) +		return 0; + +	return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); +} + +static const struct pci_raw_ops ce4100_pci_conf = { +	.read =	ce4100_conf_read, +	.write = ce4100_conf_write, +}; + +int __init ce4100_pci_init(void) +{ +	init_sim_regs(); +	raw_pci_ops = &ce4100_pci_conf; +	/* Indicate caller that it should invoke pci_legacy_init() */ +	return 1; +} diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index f7c8a399978..059a76c2973 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -6,22 +6,26 @@  #include <linux/sched.h>  #include <linux/pci.h> +#include <linux/pci-acpi.h>  #include <linux/ioport.h>  #include <linux/init.h>  #include <linux/dmi.h>  #include <linux/slab.h> +#include <asm-generic/pci-bridge.h>  #include <asm/acpi.h>  #include <asm/segment.h>  #include <asm/io.h>  #include <asm/smp.h>  #include <asm/pci_x86.h> +#include <asm/setup.h>  unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |  				PCI_PROBE_MMCONF;  unsigned int pci_early_dump_regs;  static int pci_bf_sort; +static int smbios_type_b1_flag;  int pci_routeirq;  int noioapicquirk;  #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS @@ -31,9 +35,8 @@ int noioapicreroute = 1;  #endif  int pcibios_last_bus = -1;  unsigned long pirq_table_addr; -struct pci_bus *pci_root_bus; -struct pci_raw_ops *raw_pci_ops; -struct pci_raw_ops *raw_pci_ext_ops; +const struct pci_raw_ops *__read_mostly raw_pci_ops; +const struct pci_raw_ops *__read_mostly raw_pci_ext_ops;  int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,  						int reg, int len, u32 *val) @@ -78,14 +81,14 @@ struct pci_ops pci_root_ops = {   */  DEFINE_RAW_SPINLOCK(pci_config_lock); -static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) +static int can_skip_ioresource_align(const struct dmi_system_id *d)  {  	pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;  	printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);  	return 0;  } -static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitconst = { +static const struct dmi_system_id can_skip_pciprobe_dmi_table[] = {  /*   * Systems where PCI IO resource ISA alignment can be skipped   * when the ISA enable bit in the bridge control is not set @@ -122,7 +125,7 @@ void __init dmi_check_skip_isa_align(void)  	dmi_check_system(can_skip_pciprobe_dmi_table);  } -static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) +static void pcibios_fixup_device_resources(struct pci_dev *dev)  {  	struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];  	struct resource *bar_r; @@ -159,24 +162,31 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)   *  are examined.   */ -void __devinit pcibios_fixup_bus(struct pci_bus *b) +void pcibios_fixup_bus(struct pci_bus *b)  {  	struct pci_dev *dev; -	/* root bus? */ -	if (!b->parent) -		x86_pci_root_bus_res_quirks(b);  	pci_read_bridge_bases(b);  	list_for_each_entry(dev, &b->devices, bus_list)  		pcibios_fixup_device_resources(dev);  } +void pcibios_add_bus(struct pci_bus *bus) +{ +	acpi_pci_add_bus(bus); +} + +void pcibios_remove_bus(struct pci_bus *bus) +{ +	acpi_pci_remove_bus(bus); +} +  /*   * Only use DMI information to set this if nothing was passed   * on the kernel command line (which was parsed earlier).   */ -static int __devinit set_bf_sort(const struct dmi_system_id *d) +static int set_bf_sort(const struct dmi_system_id *d)  {  	if (pci_bf_sort == pci_bf_sort_default) {  		pci_bf_sort = pci_dmi_bf; @@ -185,11 +195,44 @@ static int __devinit set_bf_sort(const struct dmi_system_id *d)  	return 0;  } +static void read_dmi_type_b1(const struct dmi_header *dm, +				       void *private_data) +{ +	u8 *d = (u8 *)dm + 4; + +	if (dm->type != 0xB1) +		return; +	switch (((*(u32 *)d) >> 9) & 0x03) { +	case 0x00: +		printk(KERN_INFO "dmi type 0xB1 record - unknown flag\n"); +		break; +	case 0x01: /* set pci=bfsort */ +		smbios_type_b1_flag = 1; +		break; +	case 0x02: /* do not set pci=bfsort */ +		smbios_type_b1_flag = 2; +		break; +	default: +		break; +	} +} + +static int find_sort_method(const struct dmi_system_id *d) +{ +	dmi_walk(read_dmi_type_b1, NULL); + +	if (smbios_type_b1_flag == 1) { +		set_bf_sort(d); +		return 0; +	} +	return -1; +} +  /*   * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus)   */  #ifdef __i386__ -static int __devinit assign_all_busses(const struct dmi_system_id *d) +static int assign_all_busses(const struct dmi_system_id *d)  {  	pci_probe |= PCI_ASSIGN_ALL_BUSSES;  	printk(KERN_INFO "%s detected: enabling PCI bus# renumbering" @@ -198,7 +241,15 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)  }  #endif -static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { +static int set_scan_all(const struct dmi_system_id *d) +{ +	printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n", +	       d->ident); +	pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); +	return 0; +} + +static const struct dmi_system_id pciprobe_dmi_table[] = {  #ifdef __i386__  /*   * Laptops which need pci=assign-busses to see Cardbus cards @@ -253,6 +304,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {  		},  	},  	{ +		.callback = find_sort_method, +		.ident = "Dell System", +		.matches = { +			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), +		}, +	}, +	{  		.callback = set_bf_sort,  		.ident = "HP ProLiant BL20p G3",  		.matches = { @@ -382,6 +440,14 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {  			DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"),  		},  	}, +	{ +		.callback = set_scan_all, +		.ident = "Stratus/NEC ftServer", +		.matches = { +			DMI_MATCH(DMI_SYS_VENDOR, "Stratus"), +			DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"), +		}, +	},  	{}  }; @@ -390,37 +456,27 @@ void __init dmi_check_pciprobe(void)  	dmi_check_system(pciprobe_dmi_table);  } -struct pci_bus * __devinit pcibios_scan_root(int busnum) +void pcibios_scan_root(int busnum)  { -	struct pci_bus *bus = NULL; +	struct pci_bus *bus;  	struct pci_sysdata *sd; +	LIST_HEAD(resources); -	while ((bus = pci_find_next_bus(bus)) != NULL) { -		if (bus->number == busnum) { -			/* Already scanned */ -			return bus; -		} -	} - -	/* Allocate per-root-bus (not per bus) arch-specific data. -	 * TODO: leak; this memory is never freed. -	 * It's arguable whether it's worth the trouble to care. -	 */  	sd = kzalloc(sizeof(*sd), GFP_KERNEL);  	if (!sd) { -		printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); -		return NULL; +		printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busnum); +		return;  	} - -	sd->node = get_mp_bus_to_node(busnum); - +	sd->node = x86_pci_root_bus_node(busnum); +	x86_pci_root_bus_resources(busnum, &resources);  	printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); -	bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); -	if (!bus) +	bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); +	if (!bus) { +		pci_free_resource_list(&resources);  		kfree(sd); - -	return bus; +	}  } +  void __init pcibios_set_cache_line_size(void)  {  	struct cpuinfo_x86 *c = &boot_cpu_data; @@ -456,7 +512,7 @@ int __init pcibios_init(void)  	return 0;  } -char * __devinit  pcibios_setup(char *str) +char * __init pcibios_setup(char *str)  {  	if (!strcmp(str, "off")) {  		pci_probe = 0; @@ -511,7 +567,6 @@ char * __devinit  pcibios_setup(char *str)  		pci_probe |= PCI_PROBE_NOEARLY;  		return NULL;  	} -#ifndef CONFIG_X86_VISWS  	else if (!strcmp(str, "usepirqmask")) {  		pci_probe |= PCI_USE_PIRQ_MASK;  		return NULL; @@ -521,9 +576,7 @@ char * __devinit  pcibios_setup(char *str)  	} else if (!strncmp(str, "lastbus=", 8)) {  		pcibios_last_bus = simple_strtol(str+8, NULL, 0);  		return NULL; -	} -#endif -	else if (!strcmp(str, "rom")) { +	} else if (!strcmp(str, "rom")) {  		pci_probe |= PCI_ASSIGN_ROMS;  		return NULL;  	} else if (!strcmp(str, "norom")) { @@ -570,6 +623,38 @@ unsigned int pcibios_assign_all_busses(void)  	return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;  } +int pcibios_add_device(struct pci_dev *dev) +{ +	struct setup_data *data; +	struct pci_setup_rom *rom; +	u64 pa_data; + +	pa_data = boot_params.hdr.setup_data; +	while (pa_data) { +		data = ioremap(pa_data, sizeof(*rom)); +		if (!data) +			return -ENOMEM; + +		if (data->type == SETUP_PCI) { +			rom = (struct pci_setup_rom *)data; + +			if ((pci_domain_nr(dev->bus) == rom->segment) && +			    (dev->bus->number == rom->bus) && +			    (PCI_SLOT(dev->devfn) == rom->device) && +			    (PCI_FUNC(dev->devfn) == rom->function) && +			    (dev->vendor == rom->vendor) && +			    (dev->device == rom->devid)) { +				dev->rom = pa_data + +				      offsetof(struct pci_setup_rom, romdata); +				dev->romlen = rom->pcilen; +			} +		} +		pa_data = data->next; +		iounmap(data); +	} +	return 0; +} +  int pcibios_enable_device(struct pci_dev *dev, int mask)  {  	int err; @@ -588,107 +673,10 @@ void pcibios_disable_device (struct pci_dev *dev)  		pcibios_disable_irq(dev);  } -int pci_ext_cfg_avail(struct pci_dev *dev) +int pci_ext_cfg_avail(void)  {  	if (raw_pci_ext_ops)  		return 1;  	else  		return 0;  } - -struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) -{ -	struct pci_bus *bus = NULL; -	struct pci_sysdata *sd; - -	/* -	 * Allocate per-root-bus (not per bus) arch-specific data. -	 * TODO: leak; this memory is never freed. -	 * It's arguable whether it's worth the trouble to care. -	 */ -	sd = kzalloc(sizeof(*sd), GFP_KERNEL); -	if (!sd) { -		printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); -		return NULL; -	} -	sd->node = node; -	bus = pci_scan_bus(busno, ops, sd); -	if (!bus) -		kfree(sd); - -	return bus; -} - -struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno) -{ -	return pci_scan_bus_on_node(busno, &pci_root_ops, -1); -} - -/* - * NUMA info for PCI busses - * - * Early arch code is responsible for filling in reasonable values here. - * A node id of "-1" means "use current node".  In other words, if a bus - * has a -1 node id, it's not tightly coupled to any particular chunk - * of memory (as is the case on some Nehalem systems). - */ -#ifdef CONFIG_NUMA - -#define BUS_NR 256 - -#ifdef CONFIG_X86_64 - -static int mp_bus_to_node[BUS_NR] = { -	[0 ... BUS_NR - 1] = -1 -}; - -void set_mp_bus_to_node(int busnum, int node) -{ -	if (busnum >= 0 &&  busnum < BUS_NR) -		mp_bus_to_node[busnum] = node; -} - -int get_mp_bus_to_node(int busnum) -{ -	int node = -1; - -	if (busnum < 0 || busnum > (BUS_NR - 1)) -		return node; - -	node = mp_bus_to_node[busnum]; - -	/* -	 * let numa_node_id to decide it later in dma_alloc_pages -	 * if there is no ram on that node -	 */ -	if (node != -1 && !node_online(node)) -		node = -1; - -	return node; -} - -#else /* CONFIG_X86_32 */ - -static int mp_bus_to_node[BUS_NR] = { -	[0 ... BUS_NR - 1] = -1 -}; - -void set_mp_bus_to_node(int busnum, int node) -{ -	if (busnum >= 0 &&  busnum < BUS_NR) -	mp_bus_to_node[busnum] = (unsigned char) node; -} - -int get_mp_bus_to_node(int busnum) -{ -	int node; - -	if (busnum < 0 || busnum > (BUS_NR - 1)) -		return 0; -	node = mp_bus_to_node[busnum]; -	return node; -} - -#endif /* CONFIG_X86_32 */ - -#endif /* CONFIG_NUMA */ diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index bd33620b007..15460590b8c 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -22,7 +22,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus,  {  	unsigned long flags; -	if ((bus > 255) || (devfn > 255) || (reg > 4095)) { +	if (seg || (bus > 255) || (devfn > 255) || (reg > 4095)) {  		*value = -1;  		return -EINVAL;  	} @@ -53,7 +53,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus,  {  	unsigned long flags; -	if ((bus > 255) || (devfn > 255) || (reg > 4095)) +	if (seg || (bus > 255) || (devfn > 255) || (reg > 4095))  		return -EINVAL;  	raw_spin_lock_irqsave(&pci_config_lock, flags); @@ -79,7 +79,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus,  #undef PCI_CONF1_ADDRESS -struct pci_raw_ops pci_direct_conf1 = { +const struct pci_raw_ops pci_direct_conf1 = {  	.read =		pci_conf1_read,  	.write =	pci_conf1_write,  }; @@ -97,6 +97,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus,  	unsigned long flags;  	int dev, fn; +	WARN_ON(seg);  	if ((bus > 255) || (devfn > 255) || (reg > 255)) {  		*value = -1;  		return -EINVAL; @@ -138,6 +139,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,  	unsigned long flags;  	int dev, fn; +	WARN_ON(seg);  	if ((bus > 255) || (devfn > 255) || (reg > 255))   		return -EINVAL; @@ -173,7 +175,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,  #undef PCI_CONF2_ADDRESS -struct pci_raw_ops pci_direct_conf2 = { +static const struct pci_raw_ops pci_direct_conf2 = {  	.read =		pci_conf2_read,  	.write =	pci_conf2_write,  }; @@ -189,7 +191,7 @@ struct pci_raw_ops pci_direct_conf2 = {   * This should be close to trivial, but it isn't, because there are buggy   * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.   */ -static int __init pci_sanity_check(struct pci_raw_ops *o) +static int __init pci_sanity_check(const struct pci_raw_ops *o)  {  	u32 x = 0;  	int year, devfn; @@ -280,12 +282,9 @@ void __init pci_direct_init(int type)  int __init pci_direct_probe(void)  { -	struct resource *region, *region2; -  	if ((pci_probe & PCI_PROBE_CONF1) == 0)  		goto type2; -	region = request_region(0xCF8, 8, "PCI conf1"); -	if (!region) +	if (!request_region(0xCF8, 8, "PCI conf1"))  		goto type2;  	if (pci_check_type1()) { @@ -293,16 +292,14 @@ int __init pci_direct_probe(void)  		port_cf9_safe = true;  		return 1;  	} -	release_resource(region); +	release_region(0xCF8, 8);   type2:  	if ((pci_probe & PCI_PROBE_CONF2) == 0)  		return 0; -	region = request_region(0xCF8, 4, "PCI conf2"); -	if (!region) +	if (!request_region(0xCF8, 4, "PCI conf2"))  		return 0; -	region2 = request_region(0xC000, 0x1000, "PCI conf2"); -	if (!region2) +	if (!request_region(0xC000, 0x1000, "PCI conf2"))  		goto fail2;  	if (pci_check_type2()) { @@ -311,8 +308,8 @@ int __init pci_direct_probe(void)  		return 2;  	} -	release_resource(region2); +	release_region(0xC000, 0x1000);   fail2: -	release_resource(region); +	release_region(0xCF8, 4);  	return 0;  } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 6dd89555fbf..b5e60268d93 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -5,10 +5,11 @@  #include <linux/delay.h>  #include <linux/dmi.h>  #include <linux/pci.h> -#include <linux/init.h> +#include <linux/vgaarb.h> +#include <asm/hpet.h>  #include <asm/pci_x86.h> -static void __devinit pci_fixup_i450nx(struct pci_dev *d) +static void pci_fixup_i450nx(struct pci_dev *d)  {  	/*  	 * i450NX -- Find and scan all secondary buses on all PXB's. @@ -25,15 +26,15 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)  		dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno,  			suba, subb);  		if (busno) -			pci_scan_bus_with_sysdata(busno);	/* Bus A */ +			pcibios_scan_root(busno);	/* Bus A */  		if (suba < subb) -			pci_scan_bus_with_sysdata(suba+1);	/* Bus B */ +			pcibios_scan_root(suba+1);	/* Bus B */  	}  	pcibios_last_bus = -1;  }  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); -static void __devinit pci_fixup_i450gx(struct pci_dev *d) +static void pci_fixup_i450gx(struct pci_dev *d)  {  	/*  	 * i450GX and i450KX -- Find and scan all secondary buses. @@ -42,12 +43,12 @@ static void __devinit pci_fixup_i450gx(struct pci_dev *d)  	u8 busno;  	pci_read_config_byte(d, 0x4a, &busno);  	dev_info(&d->dev, "i440KX/GX host bridge; secondary bus %02x\n", busno); -	pci_scan_bus_with_sysdata(busno); +	pcibios_scan_root(busno);  	pcibios_last_bus = -1;  }  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); -static void __devinit  pci_fixup_umc_ide(struct pci_dev *d) +static void pci_fixup_umc_ide(struct pci_dev *d)  {  	/*  	 * UM8886BF IDE controller sets region type bits incorrectly, @@ -61,7 +62,7 @@ static void __devinit  pci_fixup_umc_ide(struct pci_dev *d)  }  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide); -static void __devinit  pci_fixup_ncr53c810(struct pci_dev *d) +static void pci_fixup_ncr53c810(struct pci_dev *d)  {  	/*  	 * NCR 53C810 returns class code 0 (at least on some systems). @@ -74,7 +75,7 @@ static void __devinit  pci_fixup_ncr53c810(struct pci_dev *d)  }  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810); -static void __devinit  pci_fixup_latency(struct pci_dev *d) +static void pci_fixup_latency(struct pci_dev *d)  {  	/*  	 *  SiS 5597 and 5598 chipsets require latency timer set to @@ -86,7 +87,7 @@ static void __devinit  pci_fixup_latency(struct pci_dev *d)  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency);  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency); -static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d) +static void pci_fixup_piix4_acpi(struct pci_dev *d)  {  	/*  	 * PIIX4 ACPI device: hardwired IRQ9 @@ -162,13 +163,13 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_   * system to PCI bus no matter what are their window settings, so they are   * "transparent" (or subtractive decoding) from programmers point of view.   */ -static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) +static void pci_fixup_transparent_bridge(struct pci_dev *dev)  { -	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && -	    (dev->device & 0xff00) == 0x2400) +	if ((dev->device & 0xff00) == 0x2400)  		dev->transparent = 1;  } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, +			 PCI_CLASS_BRIDGE_PCI, 8, pci_fixup_transparent_bridge);  /*   * Fixup for C1 Halt Disconnect problem on nForce2 systems. @@ -230,7 +231,7 @@ static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int wh  	offset = quirk_aspm_offset[GET_INDEX(bus->self->device, devfn)];  	if ((offset) && (where == offset)) -		value = value & 0xfffffffc; +		value = value & ~PCI_EXP_LNKCTL_ASPMC;  	return raw_pci_write(pci_domain_nr(bus), bus->number,  						devfn, where, size, value); @@ -251,7 +252,7 @@ static struct pci_ops quirk_pcie_aspm_ops = {   */  static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)  { -	int cap_base, i; +	int i;  	struct pci_bus  *pbus;  	struct pci_dev *dev; @@ -277,7 +278,7 @@ static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)  		for (i = GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i)  			quirk_aspm_offset[i] = 0; -		pbus->ops = pbus->parent->ops; +		pci_bus_set_ops(pbus, pbus->parent->ops);  	} else {  		/*  		 * If devices are attached to the root port at power-up or @@ -285,13 +286,15 @@ static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)  		 * each root port to save the register offsets and replace the  		 * bus ops.  		 */ -		list_for_each_entry(dev, &pbus->devices, bus_list) { +		list_for_each_entry(dev, &pbus->devices, bus_list)  			/* There are 0 to 8 devices attached to this bus */ -			cap_base = pci_find_capability(dev, PCI_CAP_ID_EXP); -			quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)] = cap_base + 0x10; -		} -		pbus->ops = &quirk_pcie_aspm_ops; +			quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)] = +				dev->pcie_cap + PCI_EXP_LNKCTL; + +		pci_bus_set_ops(pbus, &quirk_pcie_aspm_ops); +		dev_info(&pbus->dev, "writes to ASPM control bits will be ignored\n");  	} +  }  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PA,	pcie_rootport_aspm_quirk);  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PA1,	pcie_rootport_aspm_quirk); @@ -311,20 +314,18 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PC1,	pcie_r   * IORESOURCE_ROM_SHADOW is used to associate the boot video   * card with this copy. On laptops this copy has to be used since   * the main ROM may be compressed or combined with another image. - * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW - * is marked here since the boot video device will be the only enabled - * video device at this point. + * See pci_map_rom() for use of this flag. Before marking the device + * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set + * by either arch cde or vga-arbitration, if so only apply the fixup to this + * already determined primary video card.   */ -static void __devinit pci_fixup_video(struct pci_dev *pdev) +static void pci_fixup_video(struct pci_dev *pdev)  {  	struct pci_dev *bridge;  	struct pci_bus *bus;  	u16 config; -	if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) -		return; -  	/* Is VGA routed to us? */  	bus = pdev->bus;  	while (bus) { @@ -337,9 +338,7 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)  		 * type BRIDGE, or CARDBUS. Host to PCI controllers use  		 * PCI header type NORMAL.  		 */ -		if (bridge -		    && ((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE) -		       || (bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) { +		if (bridge && (pci_is_bridge(bridge))) {  			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,  						&config);  			if (!(config & PCI_BRIDGE_CTL_VGA)) @@ -347,16 +346,20 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)  		}  		bus = bus->parent;  	} -	pci_read_config_word(pdev, PCI_COMMAND, &config); -	if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { -		pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; -		dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); +	if (!vga_default_device() || pdev == vga_default_device()) { +		pci_read_config_word(pdev, PCI_COMMAND, &config); +		if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { +			pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; +			dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); +			vga_set_default_device(pdev); +		}  	}  } -DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, +				PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); -static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = { +static const struct dmi_system_id msi_k8t_dmi_table[] = {  	{  		.ident = "MSI-K8T-Neo2Fir",  		.matches = { @@ -377,7 +380,7 @@ static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {   * The soundcard is only enabled, if the mainborad is identified   * via DMI-tables and the soundcard is detected to be off.   */ -static void __devinit pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev) +static void pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev)  {  	unsigned char val;  	if (!dmi_check_system(msi_k8t_dmi_table)) @@ -413,7 +416,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,   */  static u16 toshiba_line_size; -static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] = { +static const struct dmi_system_id toshiba_ohci1394_dmi_table[] = {  	{  		.ident = "Toshiba PS5 based laptop",  		.matches = { @@ -438,7 +441,7 @@ static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] =  	{ }  }; -static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) +static void pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)  {  	if (!dmi_check_system(toshiba_ohci1394_dmi_table))  		return; /* only applies to certain Toshibas (so far) */ @@ -449,7 +452,7 @@ static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032,  			 pci_pre_fixup_toshiba_ohci1394); -static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) +static void pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev)  {  	if (!dmi_check_system(toshiba_ohci1394_dmi_table))  		return; /* only applies to certain Toshibas (so far) */ @@ -487,7 +490,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY,   * Siemens Nixdorf AG FSC Multiprocessor Interrupt Controller:   * prevent update of the BAR0, which doesn't look like a normal BAR.   */ -static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) +static void pci_siemens_interrupt_controller(struct pci_dev *dev)  {  	dev->resource[0].flags |= IORESOURCE_PCI_FIXED;  } @@ -521,3 +524,33 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev)  	}  }  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); + +#ifdef CONFIG_HPET_TIMER +static void sb600_hpet_quirk(struct pci_dev *dev) +{ +	struct resource *r = &dev->resource[1]; + +	if (r->flags & IORESOURCE_MEM && r->start == hpet_address) { +		r->flags |= IORESOURCE_PCI_FIXED; +		dev_info(&dev->dev, "reg 0x14 contains HPET; making it immovable\n"); +	} +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, 0x4385, sb600_hpet_quirk); +#endif + +/* + * Twinhead H12Y needs us to block out a region otherwise we map devices + * there and any access kills the box. + * + *   See: https://bugzilla.kernel.org/show_bug.cgi?id=10231 + * + * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor) + */ +static void twinhead_reserve_killing_zone(struct pci_dev *dev) +{ +        if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) { +                pr_info("Reserving memory on Twinhead H12Y\n"); +                request_mem_region(0xFFB00000, 0x100000, "twinhead"); +        } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index c4bb261c106..a19ed92e74e 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -26,6 +26,7 @@  #include <linux/types.h>  #include <linux/kernel.h> +#include <linux/export.h>  #include <linux/pci.h>  #include <linux/init.h>  #include <linux/ioport.h> @@ -38,6 +39,95 @@  #include <asm/io_apic.h> +/* + * This list of dynamic mappings is for temporarily maintaining + * original BIOS BAR addresses for possible reinstatement. + */ +struct pcibios_fwaddrmap { +	struct list_head list; +	struct pci_dev *dev; +	resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; +}; + +static LIST_HEAD(pcibios_fwaddrmappings); +static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock); +static bool pcibios_fw_addr_done; + +/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */ +static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) +{ +	struct pcibios_fwaddrmap *map; + +	WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock)); + +	list_for_each_entry(map, &pcibios_fwaddrmappings, list) +		if (map->dev == dev) +			return map; + +	return NULL; +} + +static void +pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr) +{ +	unsigned long flags; +	struct pcibios_fwaddrmap *map; + +	if (pcibios_fw_addr_done) +		return; + +	spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); +	map = pcibios_fwaddrmap_lookup(dev); +	if (!map) { +		spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); +		map = kzalloc(sizeof(*map), GFP_KERNEL); +		if (!map) +			return; + +		map->dev = pci_dev_get(dev); +		map->fw_addr[idx] = fw_addr; +		INIT_LIST_HEAD(&map->list); + +		spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); +		list_add_tail(&map->list, &pcibios_fwaddrmappings); +	} else +		map->fw_addr[idx] = fw_addr; +	spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); +} + +resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) +{ +	unsigned long flags; +	struct pcibios_fwaddrmap *map; +	resource_size_t fw_addr = 0; + +	if (pcibios_fw_addr_done) +		return 0; + +	spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); +	map = pcibios_fwaddrmap_lookup(dev); +	if (map) +		fw_addr = map->fw_addr[idx]; +	spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); + +	return fw_addr; +} + +static void __init pcibios_fw_addr_list_del(void) +{ +	unsigned long flags; +	struct pcibios_fwaddrmap *entry, *next; + +	spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); +	list_for_each_entry_safe(entry, next, &pcibios_fwaddrmappings, list) { +		list_del(&entry->list); +		pci_dev_put(entry->dev); +		kfree(entry); +	} +	spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); +	pcibios_fw_addr_done = true; +} +  static int  skip_isa_ioresource_align(struct pci_dev *dev) { @@ -65,21 +155,13 @@ pcibios_align_resource(void *data, const struct resource *res,  			resource_size_t size, resource_size_t align)  {  	struct pci_dev *dev = data; -	resource_size_t start = round_down(res->end - size + 1, align); +	resource_size_t start = res->start;  	if (res->flags & IORESOURCE_IO) { - -		/* -		 * If we're avoiding ISA aliases, the largest contiguous I/O -		 * port space is 256 bytes.  Clearing bits 9 and 10 preserves -		 * all 256-byte and smaller alignments, so the result will -		 * still be correctly aligned. -		 */ -		if (!skip_isa_ioresource_align(dev)) -			start &= ~0x300; -	} else if (res->flags & IORESOURCE_MEM) { -		if (start < BIOS_END) -			start = res->end;	/* fail; no space */ +		if (skip_isa_ioresource_align(dev)) +			return start; +		if (start & 0x300) +			start = (start + 0x3ff) & ~0x3ff;  	}  	return start;  } @@ -119,46 +201,48 @@ EXPORT_SYMBOL(pcibios_align_resource);   *	    as well.   */ -static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +static void pcibios_allocate_bridge_resources(struct pci_dev *dev)  { -	struct pci_bus *bus; -	struct pci_dev *dev;  	int idx;  	struct resource *r; -	/* Depth-First Search on bus tree */ -	list_for_each_entry(bus, bus_list, node) { -		if ((dev = bus->self)) { -			for (idx = PCI_BRIDGE_RESOURCES; -			    idx < PCI_NUM_RESOURCES; idx++) { -				r = &dev->resource[idx]; -				if (!r->flags) -					continue; -				if (!r->start || -				    pci_claim_resource(dev, idx) < 0) { -					/* -					 * Something is wrong with the region. -					 * Invalidate the resource to prevent -					 * child resource allocations in this -					 * range. -					 */ -					r->start = r->end = 0; -					r->flags = 0; -				} -			} +	for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { +		r = &dev->resource[idx]; +		if (!r->flags) +			continue; +		if (r->parent)	/* Already allocated */ +			continue; +		if (!r->start || pci_claim_resource(dev, idx) < 0) { +			/* +			 * Something is wrong with the region. +			 * Invalidate the resource to prevent +			 * child resource allocations in this +			 * range. +			 */ +			r->start = r->end = 0; +			r->flags = 0;  		} -		pcibios_allocate_bus_resources(&bus->children);  	}  } +static void pcibios_allocate_bus_resources(struct pci_bus *bus) +{ +	struct pci_bus *child; + +	/* Depth-First Search on bus tree */ +	if (bus->self) +		pcibios_allocate_bridge_resources(bus->self); +	list_for_each_entry(child, &bus->children, node) +		pcibios_allocate_bus_resources(child); +} +  struct pci_check_idx_range {  	int start;  	int end;  }; -static void __init pcibios_allocate_resources(int pass) +static void pcibios_allocate_dev_resources(struct pci_dev *dev, int pass)  { -	struct pci_dev *dev = NULL;  	int idx, disabled, i;  	u16 command;  	struct resource *r; @@ -170,14 +254,13 @@ static void __init pcibios_allocate_resources(int pass)  #endif  	}; -	for_each_pci_dev(dev) { -		pci_read_config_word(dev, PCI_COMMAND, &command); -		for (i = 0; i < ARRAY_SIZE(idx_range); i++) +	pci_read_config_word(dev, PCI_COMMAND, &command); +	for (i = 0; i < ARRAY_SIZE(idx_range); i++)  		for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) {  			r = &dev->resource[idx]; -			if (r->parent)		/* Already allocated */ +			if (r->parent)	/* Already allocated */  				continue; -			if (!r->start)		/* Address not assigned at all */ +			if (!r->start)	/* Address not assigned at all */  				continue;  			if (r->flags & IORESOURCE_IO)  				disabled = !(command & PCI_COMMAND_IO); @@ -188,71 +271,94 @@ static void __init pcibios_allocate_resources(int pass)  					"BAR %d: reserving %pr (d=%d, p=%d)\n",  					idx, r, disabled, pass);  				if (pci_claim_resource(dev, idx) < 0) { -					/* We'll assign a new address later */ -					dev->fw_addr[idx] = r->start; -					r->end -= r->start; -					r->start = 0; +					if (r->flags & IORESOURCE_PCI_FIXED) { +						dev_info(&dev->dev, "BAR %d %pR is immovable\n", +							 idx, r); +					} else { +						/* We'll assign a new address later */ +						pcibios_save_fw_addr(dev, +								idx, r->start); +						r->end -= r->start; +						r->start = 0; +					}  				}  			}  		} -		if (!pass) { -			r = &dev->resource[PCI_ROM_RESOURCE]; -			if (r->flags & IORESOURCE_ROM_ENABLE) { -				/* Turn the ROM off, leave the resource region, -				 * but keep it unregistered. */ -				u32 reg; -				dev_dbg(&dev->dev, "disabling ROM %pR\n", r); -				r->flags &= ~IORESOURCE_ROM_ENABLE; -				pci_read_config_dword(dev, -						dev->rom_base_reg, ®); -				pci_write_config_dword(dev, dev->rom_base_reg, +	if (!pass) { +		r = &dev->resource[PCI_ROM_RESOURCE]; +		if (r->flags & IORESOURCE_ROM_ENABLE) { +			/* Turn the ROM off, leave the resource region, +			 * but keep it unregistered. */ +			u32 reg; +			dev_dbg(&dev->dev, "disabling ROM %pR\n", r); +			r->flags &= ~IORESOURCE_ROM_ENABLE; +			pci_read_config_dword(dev, dev->rom_base_reg, ®); +			pci_write_config_dword(dev, dev->rom_base_reg,  						reg & ~PCI_ROM_ADDRESS_ENABLE); -			}  		}  	}  } -static int __init pcibios_assign_resources(void) +static void pcibios_allocate_resources(struct pci_bus *bus, int pass) +{ +	struct pci_dev *dev; +	struct pci_bus *child; + +	list_for_each_entry(dev, &bus->devices, bus_list) { +		pcibios_allocate_dev_resources(dev, pass); + +		child = dev->subordinate; +		if (child) +			pcibios_allocate_resources(child, pass); +	} +} + +static void pcibios_allocate_dev_rom_resource(struct pci_dev *dev)  { -	struct pci_dev *dev = NULL;  	struct resource *r; -	if (!(pci_probe & PCI_ASSIGN_ROMS)) { -		/* -		 * Try to use BIOS settings for ROMs, otherwise let -		 * pci_assign_unassigned_resources() allocate the new -		 * addresses. -		 */ -		for_each_pci_dev(dev) { -			r = &dev->resource[PCI_ROM_RESOURCE]; -			if (!r->flags || !r->start) -				continue; -			if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { -				r->end -= r->start; -				r->start = 0; -			} -		} +	/* +	 * Try to use BIOS settings for ROMs, otherwise let +	 * pci_assign_unassigned_resources() allocate the new +	 * addresses. +	 */ +	r = &dev->resource[PCI_ROM_RESOURCE]; +	if (!r->flags || !r->start) +		return; +	if (r->parent) /* Already allocated */ +		return; + +	if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { +		r->end -= r->start; +		r->start = 0;  	} +} +static void pcibios_allocate_rom_resources(struct pci_bus *bus) +{ +	struct pci_dev *dev; +	struct pci_bus *child; -	pci_assign_unassigned_resources(); +	list_for_each_entry(dev, &bus->devices, bus_list) { +		pcibios_allocate_dev_rom_resource(dev); -	return 0; +		child = dev->subordinate; +		if (child) +			pcibios_allocate_rom_resources(child); +	}  } -void __init pcibios_resource_survey(void) +static int __init pcibios_assign_resources(void)  { -	DBG("PCI: Allocating resources\n"); -	pcibios_allocate_bus_resources(&pci_root_buses); -	pcibios_allocate_resources(0); -	pcibios_allocate_resources(1); +	struct pci_bus *bus; -	e820_reserve_resources_late(); -	/* -	 * Insert the IO APIC resources after PCI initialization has -	 * occured to handle IO APICS that are mapped in on a BAR in -	 * PCI space, but before trying to assign unassigned pci res. -	 */ -	ioapic_insert_resources(); +	if (!(pci_probe & PCI_ASSIGN_ROMS)) +		list_for_each_entry(bus, &pci_root_buses, node) +			pcibios_allocate_rom_resources(bus); + +	pci_assign_unassigned_resources(); +	pcibios_fw_addr_list_del(); + +	return 0;  }  /** @@ -261,24 +367,40 @@ void __init pcibios_resource_survey(void)   */  fs_initcall(pcibios_assign_resources); -/* - *  If we set up a device for bus mastering, we need to check the latency - *  timer as certain crappy BIOSes forget to set it properly. - */ -unsigned int pcibios_max_latency = 255; +void pcibios_resource_survey_bus(struct pci_bus *bus) +{ +	dev_printk(KERN_DEBUG, &bus->dev, "Allocating resources\n"); -void pcibios_set_master(struct pci_dev *dev) +	pcibios_allocate_bus_resources(bus); + +	pcibios_allocate_resources(bus, 0); +	pcibios_allocate_resources(bus, 1); + +	if (!(pci_probe & PCI_ASSIGN_ROMS)) +		pcibios_allocate_rom_resources(bus); +} + +void __init pcibios_resource_survey(void)  { -	u8 lat; -	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); -	if (lat < 16) -		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; -	else if (lat > pcibios_max_latency) -		lat = pcibios_max_latency; -	else -		return; -	dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); -	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +	struct pci_bus *bus; + +	DBG("PCI: Allocating resources\n"); + +	list_for_each_entry(bus, &pci_root_buses, node) +		pcibios_allocate_bus_resources(bus); + +	list_for_each_entry(bus, &pci_root_buses, node) +		pcibios_allocate_resources(bus, 0); +	list_for_each_entry(bus, &pci_root_buses, node) +		pcibios_allocate_resources(bus, 1); + +	e820_reserve_resources_late(); +	/* +	 * Insert the IO APIC resources after PCI initialization has +	 * occurred to handle IO APICS that are mapped in on a BAR in +	 * PCI space, but before trying to assign unassigned pci res. +	 */ +	ioapic_insert_resources();  }  static const struct vm_operations_struct pci_mmap_ops = { @@ -312,7 +434,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,  		/*  		 * ioremap() and ioremap_nocache() defaults to UC MINUS for now.  		 * To avoid attribute conflicts, request UC MINUS here -		 * aswell. +		 * as well.  		 */  		prot |= _PAGE_CACHE_UC_MINUS; diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/intel_mid_pci.c index cb29191cee5..84b9d672843 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -1,5 +1,5 @@  /* - * Moorestown PCI support + * Intel MID PCI support   *   Copyright (c) 2008 Intel Corporation   *     Jesse Barnes <jesse.barnes@intel.com>   * @@ -23,14 +23,15 @@  #include <linux/ioport.h>  #include <linux/init.h>  #include <linux/dmi.h> +#include <linux/acpi.h> +#include <linux/io.h> +#include <linux/smp.h> -#include <asm/acpi.h>  #include <asm/segment.h> -#include <asm/io.h> -#include <asm/smp.h>  #include <asm/pci_x86.h>  #include <asm/hw_irq.h>  #include <asm/io_apic.h> +#include <asm/intel-mid.h>  #define PCIE_CAP_OFFSET	0x100 @@ -43,6 +44,8 @@  #define PCI_FIXED_BAR_4_SIZE	0x14  #define PCI_FIXED_BAR_5_SIZE	0x1c +static int pci_soc_mode; +  /**   * fixed_bar_cap - return the offset of the fixed BAR cap if found   * @bus: PCI bus @@ -139,7 +142,8 @@ static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,   */  static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)  { -	/* This is a workaround for A0 LNC bug where PCI status register does +	/* +	 * This is a workaround for A0 LNC bug where PCI status register does  	 * not have new CAP bit set. can not be written by SW either.  	 *  	 * PCI header type in real LNC indicates a single function device, this @@ -147,10 +151,12 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)  	 * shim. Therefore, use the header type in shim instead.  	 */  	if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) -		return 0; -	if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) -		return 1; -	return 0; /* langwell on others */ +		return false; +	if (bus == 0 && (devfn == PCI_DEVFN(2, 0) +				|| devfn == PCI_DEVFN(0, 0) +				|| devfn == PCI_DEVFN(3, 0))) +		return true; +	return false; /* Langwell on others */  }  static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, @@ -168,7 +174,8 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,  {  	int offset; -	/* On MRST, there is no PCI ROM BAR, this will cause a subsequent read +	/* +	 * On MRST, there is no PCI ROM BAR, this will cause a subsequent read  	 * to ROM BAR return 0 then being ignored.  	 */  	if (where == PCI_ROM_ADDRESS) @@ -199,55 +206,95 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,  			       where, size, value);  } -static int mrst_pci_irq_enable(struct pci_dev *dev) +static int intel_mid_pci_irq_enable(struct pci_dev *dev)  {  	u8 pin;  	struct io_apic_irq_attr irq_attr;  	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); -	/* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to +	/* +	 * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to  	 * IOAPIC RTE entries, so we just enable RTE for the device.  	 */  	irq_attr.ioapic = mp_find_ioapic(dev->irq);  	irq_attr.ioapic_pin = dev->irq;  	irq_attr.trigger = 1; /* level */ -	irq_attr.polarity = 1; /* active low */ +	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) +		irq_attr.polarity = 0; /* active high */ +	else +		irq_attr.polarity = 1; /* active low */  	io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr);  	return 0;  } -struct pci_ops pci_mrst_ops = { +struct pci_ops intel_mid_pci_ops = {  	.read = pci_read,  	.write = pci_write,  };  /** - * pci_mrst_init - installs pci_mrst_ops + * intel_mid_pci_init - installs intel_mid_pci_ops   *   * Moorestown has an interesting PCI implementation (see above).   * Called when the early platform detection installs it.   */ -int __init pci_mrst_init(void) +int __init intel_mid_pci_init(void)  { -	printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); +	pr_info("Intel MID platform detected, using MID PCI ops\n");  	pci_mmcfg_late_init(); -	pcibios_enable_irq = mrst_pci_irq_enable; -	pci_root_ops = pci_mrst_ops; +	pcibios_enable_irq = intel_mid_pci_irq_enable; +	pci_root_ops = intel_mid_pci_ops; +	pci_soc_mode = 1;  	/* Continue with standard init */  	return 1;  }  /* + * Langwell devices are not true PCI devices; they are not subject to 10 ms + * d3 to d0 delay required by PCI spec. + */ +static void pci_d3delay_fixup(struct pci_dev *dev) +{ +	/* +	 * PCI fixups are effectively decided compile time. If we have a dual +	 * SoC/non-SoC kernel we don't want to mangle d3 on non-SoC devices. +	 */ +	if (!pci_soc_mode) +		return; +	/* +	 * True PCI devices in Lincroft should allow type 1 access, the rest +	 * are Langwell fake PCI devices. +	 */ +	if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID)) +		return; +	dev->d3_delay = 0; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); + +static void mrst_power_off_unused_dev(struct pci_dev *dev) +{ +	pci_set_power_state(dev, PCI_D3hot); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); + +/*   * Langwell devices reside at fixed offsets, don't try to move them.   */ -static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) +static void pci_fixed_bar_fixup(struct pci_dev *dev)  {  	unsigned long offset;  	u32 size;  	int i; +	if (!pci_soc_mode) +		return; +  	/* Must have extended configuration space */  	if (dev->cfg_size < PCIE_CAP_OFFSET + 4)  		return; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 9f9bfb705cf..84112f55dd7 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -136,13 +136,9 @@ static void __init pirq_peer_trick(void)  		busmap[e->bus] = 1;  	}  	for (i = 1; i < 256; i++) { -		int node;  		if (!busmap[i] || pci_find_bus(0, i))  			continue; -		node = get_mp_bus_to_node(i); -		if (pci_scan_bus_on_node(i, &pci_root_ops, node)) -			printk(KERN_INFO "PCI: Discovered primary peer " -			       "bus %02x [IRQ]\n", i); +		pcibios_scan_root(i);  	}  	pcibios_last_bus = -1;  } @@ -589,28 +585,28 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route  	case PCI_DEVICE_ID_INTEL_ICH10_1:  	case PCI_DEVICE_ID_INTEL_ICH10_2:  	case PCI_DEVICE_ID_INTEL_ICH10_3: -	case PCI_DEVICE_ID_INTEL_PATSBURG_LPC: +	case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0: +	case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1:  		r->name = "PIIX/ICH";  		r->get = pirq_piix_get;  		r->set = pirq_piix_set;  		return 1;  	} -	if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN) &&  -		(device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX)) { +	if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN &&  +	     device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX)  +	||  (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN &&  +	     device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX) +	||  (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN && +	     device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX) +	||  (device >= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN && +	     device <= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX)) {  		r->name = "PIIX/ICH";  		r->get = pirq_piix_get;  		r->set = pirq_piix_set;  		return 1;  	} -	if ((device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN) &&  -		(device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)) { -		r->name = "PIIX/ICH"; -		r->get = pirq_piix_get; -		r->set = pirq_piix_set; -		return 1; -	}  	return 0;  } diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index c89266be604..5b662c0faf8 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -2,6 +2,7 @@   * legacy.c - traditional, old school PCI bus probing   */  #include <linux/init.h> +#include <linux/export.h>  #include <linux/pci.h>  #include <asm/pci_x86.h> @@ -9,7 +10,7 @@   * Discover remaining PCI buses in case there are peer host bridges.   * We use the number of last PCI bus provided by the PCI BIOS.   */ -static void __devinit pcibios_fixup_peer_bridges(void) +static void pcibios_fixup_peer_bridges(void)  {  	int n; @@ -29,29 +30,24 @@ int __init pci_legacy_init(void)  	}  	printk("PCI: Probing PCI hardware\n"); -	pci_root_bus = pcibios_scan_root(0); -	if (pci_root_bus) -		pci_bus_add_devices(pci_root_bus); - +	pcibios_scan_root(0);  	return 0;  } -void __devinit pcibios_scan_specific_bus(int busn) +void pcibios_scan_specific_bus(int busn)  {  	int devfn; -	long node;  	u32 l;  	if (pci_find_bus(0, busn))  		return; -	node = get_mp_bus_to_node(busn);  	for (devfn = 0; devfn < 256; devfn += 8) {  		if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&  		    l != 0x0000 && l != 0xffff) {  			DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);  			printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn); -			pci_scan_bus_on_node(busn, &pci_root_ops, node); +			pcibios_scan_root(busn);  			return;  		}  	} diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index e282886616a..248642f4bab 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -12,11 +12,12 @@  #include <linux/pci.h>  #include <linux/init.h> -#include <linux/acpi.h>  #include <linux/sfi_acpi.h>  #include <linux/bitmap.h>  #include <linux/dmi.h>  #include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/rculist.h>  #include <asm/e820.h>  #include <asm/pci_x86.h>  #include <asm/acpi.h> @@ -24,7 +25,9 @@  #define PREFIX "PCI: "  /* Indicate if the mmcfg resources have been placed into the resource table. */ -static int __initdata pci_mmcfg_resources_inserted; +static bool pci_mmcfg_running_state; +static bool pci_mmcfg_arch_init_failed; +static DEFINE_MUTEX(pci_mmcfg_lock);  LIST_HEAD(pci_mmcfg_list); @@ -45,24 +48,24 @@ static __init void free_all_mmcfg(void)  		pci_mmconfig_remove(cfg);  } -static __init void list_add_sorted(struct pci_mmcfg_region *new) +static void list_add_sorted(struct pci_mmcfg_region *new)  {  	struct pci_mmcfg_region *cfg;  	/* keep list sorted by segment and starting bus number */ -	list_for_each_entry(cfg, &pci_mmcfg_list, list) { +	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {  		if (cfg->segment > new->segment ||  		    (cfg->segment == new->segment &&  		     cfg->start_bus >= new->start_bus)) { -			list_add_tail(&new->list, &cfg->list); +			list_add_tail_rcu(&new->list, &cfg->list);  			return;  		}  	} -	list_add_tail(&new->list, &pci_mmcfg_list); +	list_add_tail_rcu(&new->list, &pci_mmcfg_list);  } -static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, -							int end, u64 addr) +static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start, +						   int end, u64 addr)  {  	struct pci_mmcfg_region *new;  	struct resource *res; @@ -79,8 +82,6 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,  	new->start_bus = start;  	new->end_bus = end; -	list_add_sorted(new); -  	res = &new->res;  	res->start = addr + PCI_MMCFG_BUS_OFFSET(start);  	res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1; @@ -89,9 +90,25 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,  		 "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);  	res->name = new->name; -	printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at " -	       "%pR (base %#lx)\n", segment, start, end, &new->res, -	       (unsigned long) addr); +	return new; +} + +static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, +							int end, u64 addr) +{ +	struct pci_mmcfg_region *new; + +	new = pci_mmconfig_alloc(segment, start, end, addr); +	if (new) { +		mutex_lock(&pci_mmcfg_lock); +		list_add_sorted(new); +		mutex_unlock(&pci_mmcfg_lock); + +		pr_info(PREFIX +		       "MMCONFIG for domain %04x [bus %02x-%02x] at %pR " +		       "(base %#lx)\n", +		       segment, start, end, &new->res, (unsigned long)addr); +	}  	return new;  } @@ -100,7 +117,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)  {  	struct pci_mmcfg_region *cfg; -	list_for_each_entry(cfg, &pci_mmcfg_list, list) +	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)  		if (cfg->segment == segment &&  		    cfg->start_bus <= bus && bus <= cfg->end_bus)  			return cfg; @@ -343,8 +360,7 @@ static int __init pci_mmcfg_check_hostbridge(void)  			name = pci_mmcfg_probes[i].probe();  		if (name) -			printk(KERN_INFO PREFIX "%s with MMCONFIG support\n", -			       name); +			pr_info(PREFIX "%s with MMCONFIG support\n", name);  	}  	/* some end_bus_number is crazy, fix it */ @@ -353,19 +369,7 @@ static int __init pci_mmcfg_check_hostbridge(void)  	return !list_empty(&pci_mmcfg_list);  } -static void __init pci_mmcfg_insert_resources(void) -{ -	struct pci_mmcfg_region *cfg; - -	list_for_each_entry(cfg, &pci_mmcfg_list, list) -		insert_resource(&iomem_resource, &cfg->res); - -	/* Mark that the resources have been inserted. */ -	pci_mmcfg_resources_inserted = 1; -} - -static acpi_status __init check_mcfg_resource(struct acpi_resource *res, -					      void *data) +static acpi_status check_mcfg_resource(struct acpi_resource *res, void *data)  {  	struct resource *mcfg_res = data;  	struct acpi_resource_address64 address; @@ -401,8 +405,8 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res,  	return AE_OK;  } -static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, -		void *context, void **rv) +static acpi_status find_mboard_resource(acpi_handle handle, u32 lvl, +					void *context, void **rv)  {  	struct resource *mcfg_res = context; @@ -415,7 +419,7 @@ static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl,  	return AE_OK;  } -static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) +static int is_acpi_reserved(u64 start, u64 end, unsigned not_used)  {  	struct resource mcfg_res; @@ -434,13 +438,15 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)  typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); -static int __init is_mmconf_reserved(check_reserved_t is_reserved, -				    struct pci_mmcfg_region *cfg, int with_e820) +static int __ref is_mmconf_reserved(check_reserved_t is_reserved, +				    struct pci_mmcfg_region *cfg, +				    struct device *dev, int with_e820)  {  	u64 addr = cfg->res.start;  	u64 size = resource_size(&cfg->res);  	u64 old_size = size; -	int valid = 0, num_buses; +	int num_buses; +	char *method = with_e820 ? "E820" : "ACPI motherboard resources";  	while (!is_reserved(addr, addr + size, E820_RESERVED)) {  		size >>= 1; @@ -448,30 +454,76 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved,  			break;  	} -	if (size >= (16UL<<20) || size == old_size) { -		printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n", -		       &cfg->res, -		       with_e820 ? "E820" : "ACPI motherboard resources"); -		valid = 1; - -		if (old_size != size) { -			/* update end_bus */ -			cfg->end_bus = cfg->start_bus + ((size>>20) - 1); -			num_buses = cfg->end_bus - cfg->start_bus + 1; -			cfg->res.end = cfg->res.start + -			    PCI_MMCFG_BUS_OFFSET(num_buses) - 1; -			snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, -				 "PCI MMCONFIG %04x [bus %02x-%02x]", -				 cfg->segment, cfg->start_bus, cfg->end_bus); -			printk(KERN_INFO PREFIX -			       "MMCONFIG for %04x [bus%02x-%02x] " -			       "at %pR (base %#lx) (size reduced!)\n", -			       cfg->segment, cfg->start_bus, cfg->end_bus, -			       &cfg->res, (unsigned long) cfg->address); -		} +	if (size < (16UL<<20) && size != old_size) +		return 0; + +	if (dev) +		dev_info(dev, "MMCONFIG at %pR reserved in %s\n", +			 &cfg->res, method); +	else +		pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", +		       &cfg->res, method); + +	if (old_size != size) { +		/* update end_bus */ +		cfg->end_bus = cfg->start_bus + ((size>>20) - 1); +		num_buses = cfg->end_bus - cfg->start_bus + 1; +		cfg->res.end = cfg->res.start + +		    PCI_MMCFG_BUS_OFFSET(num_buses) - 1; +		snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, +			 "PCI MMCONFIG %04x [bus %02x-%02x]", +			 cfg->segment, cfg->start_bus, cfg->end_bus); + +		if (dev) +			dev_info(dev, +				"MMCONFIG " +				"at %pR (base %#lx) (size reduced!)\n", +				&cfg->res, (unsigned long) cfg->address); +		else +			pr_info(PREFIX +				"MMCONFIG for %04x [bus%02x-%02x] " +				"at %pR (base %#lx) (size reduced!)\n", +				cfg->segment, cfg->start_bus, cfg->end_bus, +				&cfg->res, (unsigned long) cfg->address);  	} -	return valid; +	return 1; +} + +static int __ref pci_mmcfg_check_reserved(struct device *dev, +		  struct pci_mmcfg_region *cfg, int early) +{ +	if (!early && !acpi_disabled) { +		if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) +			return 1; + +		if (dev) +			dev_info(dev, FW_INFO +				 "MMCONFIG at %pR not reserved in " +				 "ACPI motherboard resources\n", +				 &cfg->res); +		else +			pr_info(FW_INFO PREFIX +			       "MMCONFIG at %pR not reserved in " +			       "ACPI motherboard resources\n", +			       &cfg->res); +	} + +	/* +	 * e820_all_mapped() is marked as __init. +	 * All entries from ACPI MCFG table have been checked at boot time. +	 * For MCFG information constructed from hotpluggable host bridge's +	 * _CBA method, just assume it's reserved. +	 */ +	if (pci_mmcfg_running_state) +		return 1; + +	/* Don't try to do this check unless configuration +	   type 1 is available. how about type 2 ?*/ +	if (raw_pci_ops) +		return is_mmconf_reserved(e820_all_mapped, cfg, dev, 1); + +	return 0;  }  static void __init pci_mmcfg_reject_broken(int early) @@ -479,38 +531,14 @@ static void __init pci_mmcfg_reject_broken(int early)  	struct pci_mmcfg_region *cfg;  	list_for_each_entry(cfg, &pci_mmcfg_list, list) { -		int valid = 0; - -		if (!early && !acpi_disabled) { -			valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); - -			if (valid) -				continue; -			else -				printk(KERN_ERR FW_BUG PREFIX -				       "MMCONFIG at %pR not reserved in " -				       "ACPI motherboard resources\n", -				       &cfg->res); +		if (pci_mmcfg_check_reserved(NULL, cfg, early) == 0) { +			pr_info(PREFIX "not using MMCONFIG\n"); +			free_all_mmcfg(); +			return;  		} - -		/* Don't try to do this check unless configuration -		   type 1 is available. how about type 2 ?*/ -		if (raw_pci_ops) -			valid = is_mmconf_reserved(e820_all_mapped, cfg, 1); - -		if (!valid) -			goto reject;  	} - -	return; - -reject: -	printk(KERN_INFO PREFIX "not using MMCONFIG\n"); -	free_all_mmcfg();  } -static int __initdata known_bridge; -  static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,  					struct acpi_mcfg_allocation *cfg)  { @@ -519,7 +547,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,  	if (cfg->address < 0xFFFFFFFF)  		return 0; -	if (!strcmp(mcfg->header.oem_id, "SGI")) +	if (!strncmp(mcfg->header.oem_id, "SGI", 3))  		return 0;  	if (mcfg->header.revision >= 1) { @@ -528,7 +556,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,  			return 0;  	} -	printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " +	pr_err(PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx "  	       "is above 4GB, ignored\n", cfg->pci_segment,  	       cfg->start_bus_number, cfg->end_bus_number, cfg->address);  	return -EINVAL; @@ -553,9 +581,9 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)  	while (i >= sizeof(struct acpi_mcfg_allocation)) {  		entries++;  		i -= sizeof(struct acpi_mcfg_allocation); -	}; +	}  	if (entries == 0) { -		printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); +		pr_err(PREFIX "MMCONFIG has no entries\n");  		return -ENODEV;  	} @@ -569,8 +597,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)  		if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,  				   cfg->end_bus_number, cfg->address) == NULL) { -			printk(KERN_WARNING PREFIX -			       "no memory for MCFG entries\n"); +			pr_warn(PREFIX "no memory for MCFG entries\n");  			free_all_mmcfg();  			return -ENOMEM;  		} @@ -581,61 +608,65 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)  static void __init __pci_mmcfg_init(int early)  { -	/* MMCONFIG disabled */ -	if ((pci_probe & PCI_PROBE_MMCONF) == 0) -		return; - -	/* MMCONFIG already enabled */ -	if (!early && !(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) +	pci_mmcfg_reject_broken(early); +	if (list_empty(&pci_mmcfg_list))  		return; -	/* for late to exit */ -	if (known_bridge) -		return; +	if (pcibios_last_bus < 0) { +		const struct pci_mmcfg_region *cfg; -	if (early) { -		if (pci_mmcfg_check_hostbridge()) -			known_bridge = 1; +		list_for_each_entry(cfg, &pci_mmcfg_list, list) { +			if (cfg->segment) +				break; +			pcibios_last_bus = cfg->end_bus; +		}  	} -	if (!known_bridge) -		acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); - -	pci_mmcfg_reject_broken(early); - -	if (list_empty(&pci_mmcfg_list)) -		return; -  	if (pci_mmcfg_arch_init())  		pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;  	else { -		/* -		 * Signal not to attempt to insert mmcfg resources because -		 * the architecture mmcfg setup could not initialize. -		 */ -		pci_mmcfg_resources_inserted = 1; +		free_all_mmcfg(); +		pci_mmcfg_arch_init_failed = true;  	}  } +static int __initdata known_bridge; +  void __init pci_mmcfg_early_init(void)  { -	__pci_mmcfg_init(1); +	if (pci_probe & PCI_PROBE_MMCONF) { +		if (pci_mmcfg_check_hostbridge()) +			known_bridge = 1; +		else +			acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); +		__pci_mmcfg_init(1); +	}  }  void __init pci_mmcfg_late_init(void)  { -	__pci_mmcfg_init(0); +	/* MMCONFIG disabled */ +	if ((pci_probe & PCI_PROBE_MMCONF) == 0) +		return; + +	if (known_bridge) +		return; + +	/* MMCONFIG hasn't been enabled yet, try again */ +	if (pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF) { +		acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); +		__pci_mmcfg_init(0); +	}  }  static int __init pci_mmcfg_late_insert_resources(void)  { -	/* -	 * If resources are already inserted or we are not using MMCONFIG, -	 * don't insert the resources. -	 */ -	if ((pci_mmcfg_resources_inserted == 1) || -	    (pci_probe & PCI_PROBE_MMCONF) == 0 || -	    list_empty(&pci_mmcfg_list)) +	struct pci_mmcfg_region *cfg; + +	pci_mmcfg_running_state = true; + +	/* If we are not using MMCONFIG, don't insert the resources. */ +	if ((pci_probe & PCI_PROBE_MMCONF) == 0)  		return 1;  	/* @@ -643,7 +674,9 @@ static int __init pci_mmcfg_late_insert_resources(void)  	 * marked so it won't cause request errors when __request_region is  	 * called.  	 */ -	pci_mmcfg_insert_resources(); +	list_for_each_entry(cfg, &pci_mmcfg_list, list) +		if (!cfg->res.parent) +			insert_resource(&iomem_resource, &cfg->res);  	return 0;  } @@ -654,3 +687,100 @@ static int __init pci_mmcfg_late_insert_resources(void)   * with other system resources.   */  late_initcall(pci_mmcfg_late_insert_resources); + +/* Add MMCFG information for host bridges */ +int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, +			phys_addr_t addr) +{ +	int rc; +	struct resource *tmp = NULL; +	struct pci_mmcfg_region *cfg; + +	if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) +		return -ENODEV; + +	if (start > end) +		return -EINVAL; + +	mutex_lock(&pci_mmcfg_lock); +	cfg = pci_mmconfig_lookup(seg, start); +	if (cfg) { +		if (cfg->end_bus < end) +			dev_info(dev, FW_INFO +				 "MMCONFIG for " +				 "domain %04x [bus %02x-%02x] " +				 "only partially covers this bridge\n", +				  cfg->segment, cfg->start_bus, cfg->end_bus); +		mutex_unlock(&pci_mmcfg_lock); +		return -EEXIST; +	} + +	if (!addr) { +		mutex_unlock(&pci_mmcfg_lock); +		return -EINVAL; +	} + +	rc = -EBUSY; +	cfg = pci_mmconfig_alloc(seg, start, end, addr); +	if (cfg == NULL) { +		dev_warn(dev, "fail to add MMCONFIG (out of memory)\n"); +		rc = -ENOMEM; +	} else if (!pci_mmcfg_check_reserved(dev, cfg, 0)) { +		dev_warn(dev, FW_BUG "MMCONFIG %pR isn't reserved\n", +			 &cfg->res); +	} else { +		/* Insert resource if it's not in boot stage */ +		if (pci_mmcfg_running_state) +			tmp = insert_resource_conflict(&iomem_resource, +						       &cfg->res); + +		if (tmp) { +			dev_warn(dev, +				 "MMCONFIG %pR conflicts with " +				 "%s %pR\n", +				 &cfg->res, tmp->name, tmp); +		} else if (pci_mmcfg_arch_map(cfg)) { +			dev_warn(dev, "fail to map MMCONFIG %pR.\n", +				 &cfg->res); +		} else { +			list_add_sorted(cfg); +			dev_info(dev, "MMCONFIG at %pR (base %#lx)\n", +				 &cfg->res, (unsigned long)addr); +			cfg = NULL; +			rc = 0; +		} +	} + +	if (cfg) { +		if (cfg->res.parent) +			release_resource(&cfg->res); +		kfree(cfg); +	} + +	mutex_unlock(&pci_mmcfg_lock); + +	return rc; +} + +/* Delete MMCFG information for host bridges */ +int pci_mmconfig_delete(u16 seg, u8 start, u8 end) +{ +	struct pci_mmcfg_region *cfg; + +	mutex_lock(&pci_mmcfg_lock); +	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) +		if (cfg->segment == seg && cfg->start_bus == start && +		    cfg->end_bus == end) { +			list_del_rcu(&cfg->list); +			synchronize_rcu(); +			pci_mmcfg_arch_unmap(cfg); +			if (cfg->res.parent) +				release_resource(&cfg->res); +			mutex_unlock(&pci_mmcfg_lock); +			kfree(cfg); +			return 0; +		} +	mutex_unlock(&pci_mmcfg_lock); + +	return -ENOENT; +} diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index a3d9c54792a..43984bc1665 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -11,9 +11,9 @@  #include <linux/pci.h>  #include <linux/init.h> +#include <linux/rcupdate.h>  #include <asm/e820.h>  #include <asm/pci_x86.h> -#include <acpi/acpi.h>  /* Assume systems with more busses have correct MCFG */  #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) @@ -60,9 +60,12 @@ err:		*value = -1;  		return -EINVAL;  	} +	rcu_read_lock();  	base = get_base_addr(seg, bus, devfn); -	if (!base) +	if (!base) { +		rcu_read_unlock();  		goto err; +	}  	raw_spin_lock_irqsave(&pci_config_lock, flags); @@ -80,6 +83,7 @@ err:		*value = -1;  		break;  	}  	raw_spin_unlock_irqrestore(&pci_config_lock, flags); +	rcu_read_unlock();  	return 0;  } @@ -93,9 +97,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,  	if ((bus > 255) || (devfn > 255) || (reg > 4095))  		return -EINVAL; +	rcu_read_lock();  	base = get_base_addr(seg, bus, devfn); -	if (!base) +	if (!base) { +		rcu_read_unlock();  		return -EINVAL; +	}  	raw_spin_lock_irqsave(&pci_config_lock, flags); @@ -113,11 +120,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,  		break;  	}  	raw_spin_unlock_irqrestore(&pci_config_lock, flags); +	rcu_read_unlock();  	return 0;  } -static struct pci_raw_ops pci_mmcfg = { +const struct pci_raw_ops pci_mmcfg = {  	.read =		pci_mmcfg_read,  	.write =	pci_mmcfg_write,  }; @@ -132,3 +140,18 @@ int __init pci_mmcfg_arch_init(void)  void __init pci_mmcfg_arch_free(void)  {  } + +int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) +{ +	return 0; +} + +void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg) +{ +	unsigned long flags; + +	/* Invalidate the cached mmcfg map entry. */ +	raw_spin_lock_irqsave(&pci_config_lock, flags); +	mmcfg_last_accessed_device = 0; +	raw_spin_unlock_irqrestore(&pci_config_lock, flags); +} diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index e783841bd1d..bea52496aea 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -9,6 +9,7 @@  #include <linux/init.h>  #include <linux/acpi.h>  #include <linux/bitmap.h> +#include <linux/rcupdate.h>  #include <asm/e820.h>  #include <asm/pci_x86.h> @@ -34,9 +35,12 @@ err:		*value = -1;  		return -EINVAL;  	} +	rcu_read_lock();  	addr = pci_dev_base(seg, bus, devfn); -	if (!addr) +	if (!addr) { +		rcu_read_unlock();  		goto err; +	}  	switch (len) {  	case 1: @@ -49,6 +53,7 @@ err:		*value = -1;  		*value = mmio_config_readl(addr + reg);  		break;  	} +	rcu_read_unlock();  	return 0;  } @@ -62,9 +67,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,  	if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))  		return -EINVAL; +	rcu_read_lock();  	addr = pci_dev_base(seg, bus, devfn); -	if (!addr) +	if (!addr) { +		rcu_read_unlock();  		return -EINVAL; +	}  	switch (len) {  	case 1: @@ -77,16 +85,17 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,  		mmio_config_writel(addr + reg, value);  		break;  	} +	rcu_read_unlock();  	return 0;  } -static struct pci_raw_ops pci_mmcfg = { +const struct pci_raw_ops pci_mmcfg = {  	.read =		pci_mmcfg_read,  	.write =	pci_mmcfg_write,  }; -static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) +static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg)  {  	void __iomem *addr;  	u64 start, size; @@ -105,16 +114,14 @@ int __init pci_mmcfg_arch_init(void)  {  	struct pci_mmcfg_region *cfg; -	list_for_each_entry(cfg, &pci_mmcfg_list, list) { -		cfg->virt = mcfg_ioremap(cfg); -		if (!cfg->virt) { -			printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n", -			       &cfg->res); +	list_for_each_entry(cfg, &pci_mmcfg_list, list) +		if (pci_mmcfg_arch_map(cfg)) {  			pci_mmcfg_arch_free();  			return 0;  		} -	} +  	raw_pci_ext_ops = &pci_mmcfg; +  	return 1;  } @@ -122,10 +129,25 @@ void __init pci_mmcfg_arch_free(void)  {  	struct pci_mmcfg_region *cfg; -	list_for_each_entry(cfg, &pci_mmcfg_list, list) { -		if (cfg->virt) { -			iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); -			cfg->virt = NULL; -		} +	list_for_each_entry(cfg, &pci_mmcfg_list, list) +		pci_mmcfg_arch_unmap(cfg); +} + +int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) +{ +	cfg->virt = mcfg_ioremap(cfg); +	if (!cfg->virt) { +		pr_err(PREFIX "can't map MMCONFIG at %pR\n", &cfg->res); +		return -ENOMEM; +	} + +	return 0; +} + +void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg) +{ +	if (cfg && cfg->virt) { +		iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); +		cfg->virt = NULL;  	}  } diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c new file mode 100644 index 00000000000..7307d9d12d1 --- /dev/null +++ b/arch/x86/pci/numachip.c @@ -0,0 +1,129 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific PCI code + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to <support@numascale.com> + * + * PCI accessor functions derived from mmconfig_64.c + * + */ + +#include <linux/pci.h> +#include <asm/pci_x86.h> + +static u8 limit __read_mostly; + +static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +{ +	struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); + +	if (cfg && cfg->virt) +		return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); +	return NULL; +} + +static int pci_mmcfg_read_numachip(unsigned int seg, unsigned int bus, +			  unsigned int devfn, int reg, int len, u32 *value) +{ +	char __iomem *addr; + +	/* Why do we have this when nobody checks it. How about a BUG()!? -AK */ +	if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { +err:		*value = -1; +		return -EINVAL; +	} + +	/* Ensure AMD Northbridges don't decode reads to other devices */ +	if (unlikely(bus == 0 && devfn >= limit)) { +		*value = -1; +		return 0; +	} + +	rcu_read_lock(); +	addr = pci_dev_base(seg, bus, devfn); +	if (!addr) { +		rcu_read_unlock(); +		goto err; +	} + +	switch (len) { +	case 1: +		*value = mmio_config_readb(addr + reg); +		break; +	case 2: +		*value = mmio_config_readw(addr + reg); +		break; +	case 4: +		*value = mmio_config_readl(addr + reg); +		break; +	} +	rcu_read_unlock(); + +	return 0; +} + +static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus, +			   unsigned int devfn, int reg, int len, u32 value) +{ +	char __iomem *addr; + +	/* Why do we have this when nobody checks it. How about a BUG()!? -AK */ +	if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) +		return -EINVAL; + +	/* Ensure AMD Northbridges don't decode writes to other devices */ +	if (unlikely(bus == 0 && devfn >= limit)) +		return 0; + +	rcu_read_lock(); +	addr = pci_dev_base(seg, bus, devfn); +	if (!addr) { +		rcu_read_unlock(); +		return -EINVAL; +	} + +	switch (len) { +	case 1: +		mmio_config_writeb(addr + reg, value); +		break; +	case 2: +		mmio_config_writew(addr + reg, value); +		break; +	case 4: +		mmio_config_writel(addr + reg, value); +		break; +	} +	rcu_read_unlock(); + +	return 0; +} + +const struct pci_raw_ops pci_mmcfg_numachip = { +	.read = pci_mmcfg_read_numachip, +	.write = pci_mmcfg_write_numachip, +}; + +int __init pci_numachip_init(void) +{ +	int ret = 0; +	u32 val; + +	/* For remote I/O, restrict bus 0 access to the actual number of AMD +	   Northbridges, which starts at device number 0x18 */ +	ret = raw_pci_read(0, 0, PCI_DEVFN(0x18, 0), 0x60, sizeof(val), &val); +	if (ret) +		goto out; + +	/* HyperTransport fabric size in bits 6:4 */ +	limit = PCI_DEVFN(0x18 + ((val >> 4) & 7) + 1, 0); + +	/* Use NumaChip PCI accessors for non-extended and extended access */ +	raw_pci_ops = raw_pci_ext_ops = &pci_mmcfg_numachip; +out: +	return ret; +} diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c deleted file mode 100644 index 5c9e2458df4..00000000000 --- a/arch/x86/pci/numaq_32.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * numaq_32.c - Low-level PCI access for NUMA-Q machines - */ - -#include <linux/pci.h> -#include <linux/init.h> -#include <linux/nodemask.h> -#include <asm/apic.h> -#include <asm/mpspec.h> -#include <asm/pci_x86.h> -#include <asm/numaq.h> - -#define BUS2QUAD(global) (mp_bus_id_to_node[global]) - -#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) - -#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) - -#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ -	(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) - -static void write_cf8(unsigned bus, unsigned devfn, unsigned reg) -{ -	unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg); -	if (xquad_portio) -		writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus))); -	else -		outl(val, 0xCF8); -} - -static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, -			     unsigned int devfn, int reg, int len, u32 *value) -{ -	unsigned long flags; -	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); - -	if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) -		return -EINVAL; - -	raw_spin_lock_irqsave(&pci_config_lock, flags); - -	write_cf8(bus, devfn, reg); - -	switch (len) { -	case 1: -		if (xquad_portio) -			*value = readb(adr + (reg & 3)); -		else -			*value = inb(0xCFC + (reg & 3)); -		break; -	case 2: -		if (xquad_portio) -			*value = readw(adr + (reg & 2)); -		else -			*value = inw(0xCFC + (reg & 2)); -		break; -	case 4: -		if (xquad_portio) -			*value = readl(adr); -		else -			*value = inl(0xCFC); -		break; -	} - -	raw_spin_unlock_irqrestore(&pci_config_lock, flags); - -	return 0; -} - -static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, -			      unsigned int devfn, int reg, int len, u32 value) -{ -	unsigned long flags; -	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); - -	if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))  -		return -EINVAL; - -	raw_spin_lock_irqsave(&pci_config_lock, flags); - -	write_cf8(bus, devfn, reg); - -	switch (len) { -	case 1: -		if (xquad_portio) -			writeb(value, adr + (reg & 3)); -		else -			outb((u8)value, 0xCFC + (reg & 3)); -		break; -	case 2: -		if (xquad_portio) -			writew(value, adr + (reg & 2)); -		else -			outw((u16)value, 0xCFC + (reg & 2)); -		break; -	case 4: -		if (xquad_portio) -			writel(value, adr + reg); -		else -			outl((u32)value, 0xCFC); -		break; -	} - -	raw_spin_unlock_irqrestore(&pci_config_lock, flags); - -	return 0; -} - -#undef PCI_CONF1_MQ_ADDRESS - -static struct pci_raw_ops pci_direct_conf1_mq = { -	.read	= pci_conf1_mq_read, -	.write	= pci_conf1_mq_write -}; - - -static void __devinit pci_fixup_i450nx(struct pci_dev *d) -{ -	/* -	 * i450NX -- Find and scan all secondary buses on all PXB's. -	 */ -	int pxb, reg; -	u8 busno, suba, subb; -	int quad = BUS2QUAD(d->bus->number); - -	dev_info(&d->dev, "searching for i450NX host bridges\n"); -	reg = 0xd0; -	for(pxb=0; pxb<2; pxb++) { -		pci_read_config_byte(d, reg++, &busno); -		pci_read_config_byte(d, reg++, &suba); -		pci_read_config_byte(d, reg++, &subb); -		dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", -			pxb, busno, suba, subb); -		if (busno) { -			/* Bus A */ -			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); -		} -		if (suba < subb) { -			/* Bus B */ -			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1)); -		} -	} -	pcibios_last_bus = -1; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); - -int __init pci_numaq_init(void) -{ -	int quad; - -	raw_pci_ops = &pci_direct_conf1_mq; - -	pci_root_bus = pcibios_scan_root(0); -	if (pci_root_bus) -		pci_bus_add_devices(pci_root_bus); -	if (num_online_nodes() > 1) -		for_each_online_node(quad) { -			if (quad == 0) -				continue; -			printk("Scanning PCI bus %d for quad %d\n",  -				QUADLOCAL2BUS(quad,0), quad); -			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0)); -		} -	return 0; -} diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index 13700ec8e2e..7043a4f0e98 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -206,6 +206,8 @@ static int pci_olpc_read(unsigned int seg, unsigned int bus,  {  	uint32_t *addr; +	WARN_ON(seg); +  	/* Use the hardware mechanism for non-simulated devices */  	if (!is_simulated(bus, devfn))  		return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); @@ -264,6 +266,8 @@ static int pci_olpc_read(unsigned int seg, unsigned int bus,  static int pci_olpc_write(unsigned int seg, unsigned int bus,  		unsigned int devfn, int reg, int len, uint32_t value)  { +	WARN_ON(seg); +  	/* Use the hardware mechanism for non-simulated devices */  	if (!is_simulated(bus, devfn))  		return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); @@ -297,7 +301,7 @@ static int pci_olpc_write(unsigned int seg, unsigned int bus,  	return 0;  } -static struct pci_raw_ops pci_olpc_conf = { +static const struct pci_raw_ops pci_olpc_conf = {  	.read =	pci_olpc_read,  	.write = pci_olpc_write,  }; diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 2492d165096..c77b24a8b2d 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -9,6 +9,7 @@  #include <linux/uaccess.h>  #include <asm/pci_x86.h>  #include <asm/pci-functions.h> +#include <asm/cacheflush.h>  /* BIOS32 signature: "_32_" */  #define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) @@ -25,6 +26,27 @@  #define PCIBIOS_HW_TYPE1_SPEC		0x10  #define PCIBIOS_HW_TYPE2_SPEC		0x20 +int pcibios_enabled; + +/* According to the BIOS specification at: + * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could + * restrict the x zone to some pages and make it ro. But this may be + * broken on some bios, complex to handle with static_protections. + * We could make the 0xe0000-0x100000 range rox, but this can break + * some ISA mapping. + * + * So we let's an rw and x hole when pcibios is used. This shouldn't + * happen for modern system with mmconfig, and if you don't want it + * you could disable pcibios... + */ +static inline void set_bios_x(void) +{ +	pcibios_enabled = 1; +	set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); +	if (__supported_pte_mask & _PAGE_NX) +		printk(KERN_INFO "PCI : PCI BIOS area is rw and x. Use pci=nobios if you want it NX.\n"); +} +  /*   * This is the standard structure used to identify the entry point   * to the BIOS32 Service Directory, as documented in @@ -102,7 +124,7 @@ static struct {  static int pci_bios_present; -static int __devinit check_pcibios(void) +static int check_pcibios(void)  {  	u32 signature, eax, ebx, ecx;  	u8 status, major_ver, minor_ver, hw_mech; @@ -159,6 +181,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,  	unsigned long flags;  	unsigned long bx = (bus << 8) | devfn; +	WARN_ON(seg);  	if (!value || (bus > 255) || (devfn > 255) || (reg > 255))  		return -EINVAL; @@ -225,6 +248,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus,  	unsigned long flags;  	unsigned long bx = (bus << 8) | devfn; +	WARN_ON(seg);  	if ((bus > 255) || (devfn > 255) || (reg > 255))   		return -EINVAL; @@ -279,7 +303,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus,   * Function table for BIOS32 access   */ -static struct pci_raw_ops pci_bios_access = { +static const struct pci_raw_ops pci_bios_access = {  	.read =		pci_bios_read,  	.write =	pci_bios_write  }; @@ -288,7 +312,7 @@ static struct pci_raw_ops pci_bios_access = {   * Try to find PCI BIOS.   */ -static struct pci_raw_ops * __devinit pci_find_bios(void) +static const struct pci_raw_ops *pci_find_bios(void)  {  	union bios32 *check;  	unsigned char sum; @@ -332,6 +356,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void)  			DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",  					bios32_entry);  			bios32_indirect.address = bios32_entry + PAGE_OFFSET; +			set_bios_x();  			if (check_pcibios())  				return &pci_bios_access;  		} diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c new file mode 100644 index 00000000000..5ceda85b868 --- /dev/null +++ b/arch/x86/pci/sta2x11-fixup.c @@ -0,0 +1,364 @@ +/* + * arch/x86/pci/sta2x11-fixup.c + * glue code for lib/swiotlb.c and DMA translation between STA2x11 + * AMBA memory mapping and the X86 memory mapping + * + * ST Microelectronics ConneXt (STA2X11/STA2X10) + * + * Copyright (c) 2010-2011 Wind River Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/export.h> +#include <linux/list.h> + +#define STA2X11_SWIOTLB_SIZE (4*1024*1024) +extern int swiotlb_late_init_with_default_size(size_t default_size); + +/* + * We build a list of bus numbers that are under the ConneXt. The + * main bridge hosts 4 busses, which are the 4 endpoints, in order. + */ +#define STA2X11_NR_EP		4	/* 0..3 included */ +#define STA2X11_NR_FUNCS	8	/* 0..7 included */ +#define STA2X11_AMBA_SIZE	(512 << 20) + +struct sta2x11_ahb_regs { /* saved during suspend */ +	u32 base, pexlbase, pexhbase, crw; +}; + +struct sta2x11_mapping { +	u32 amba_base; +	int is_suspended; +	struct sta2x11_ahb_regs regs[STA2X11_NR_FUNCS]; +}; + +struct sta2x11_instance { +	struct list_head list; +	int bus0; +	struct sta2x11_mapping map[STA2X11_NR_EP]; +}; + +static LIST_HEAD(sta2x11_instance_list); + +/* At probe time, record new instances of this bridge (likely one only) */ +static void sta2x11_new_instance(struct pci_dev *pdev) +{ +	struct sta2x11_instance *instance; + +	instance = kzalloc(sizeof(*instance), GFP_ATOMIC); +	if (!instance) +		return; +	/* This has a subordinate bridge, with 4 more-subordinate ones */ +	instance->bus0 = pdev->subordinate->number + 1; + +	if (list_empty(&sta2x11_instance_list)) { +		int size = STA2X11_SWIOTLB_SIZE; +		/* First instance: register your own swiotlb area */ +		dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size); +		if (swiotlb_late_init_with_default_size(size)) +			dev_emerg(&pdev->dev, "init swiotlb failed\n"); +	} +	list_add(&instance->list, &sta2x11_instance_list); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, 0xcc17, sta2x11_new_instance); + +/* + * Utility functions used in this file from below + */ +static struct sta2x11_instance *sta2x11_pdev_to_instance(struct pci_dev *pdev) +{ +	struct sta2x11_instance *instance; +	int ep; + +	list_for_each_entry(instance, &sta2x11_instance_list, list) { +		ep = pdev->bus->number - instance->bus0; +		if (ep >= 0 && ep < STA2X11_NR_EP) +			return instance; +	} +	return NULL; +} + +static int sta2x11_pdev_to_ep(struct pci_dev *pdev) +{ +	struct sta2x11_instance *instance; + +	instance = sta2x11_pdev_to_instance(pdev); +	if (!instance) +		return -1; + +	return pdev->bus->number - instance->bus0; +} + +static struct sta2x11_mapping *sta2x11_pdev_to_mapping(struct pci_dev *pdev) +{ +	struct sta2x11_instance *instance; +	int ep; + +	instance = sta2x11_pdev_to_instance(pdev); +	if (!instance) +		return NULL; +	ep = sta2x11_pdev_to_ep(pdev); +	return instance->map + ep; +} + +/* This is exported, as some devices need to access the MFD registers */ +struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev) +{ +	return sta2x11_pdev_to_instance(pdev); +} +EXPORT_SYMBOL(sta2x11_get_instance); + + +/** + * p2a - Translate physical address to STA2x11 AMBA address, + *       used for DMA transfers to STA2x11 + * @p: Physical address + * @pdev: PCI device (must be hosted within the connext) + */ +static dma_addr_t p2a(dma_addr_t p, struct pci_dev *pdev) +{ +	struct sta2x11_mapping *map; +	dma_addr_t a; + +	map = sta2x11_pdev_to_mapping(pdev); +	a = p + map->amba_base; +	return a; +} + +/** + * a2p - Translate STA2x11 AMBA address to physical address + *       used for DMA transfers from STA2x11 + * @a: STA2x11 AMBA address + * @pdev: PCI device (must be hosted within the connext) + */ +static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev) +{ +	struct sta2x11_mapping *map; +	dma_addr_t p; + +	map = sta2x11_pdev_to_mapping(pdev); +	p = a - map->amba_base; +	return p; +} + +/** + * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers + *     returns virtual address. This is the only "special" function here. + * @dev: PCI device + * @size: Size of the buffer + * @dma_handle: DMA address + * @flags: memory flags + */ +static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, +					    size_t size, +					    dma_addr_t *dma_handle, +					    gfp_t flags, +					    struct dma_attrs *attrs) +{ +	void *vaddr; + +	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs); +	*dma_handle = p2a(*dma_handle, to_pci_dev(dev)); +	return vaddr; +} + +/* We have our own dma_ops: the same as swiotlb but from alloc (above) */ +static struct dma_map_ops sta2x11_dma_ops = { +	.alloc = sta2x11_swiotlb_alloc_coherent, +	.free = x86_swiotlb_free_coherent, +	.map_page = swiotlb_map_page, +	.unmap_page = swiotlb_unmap_page, +	.map_sg = swiotlb_map_sg_attrs, +	.unmap_sg = swiotlb_unmap_sg_attrs, +	.sync_single_for_cpu = swiotlb_sync_single_for_cpu, +	.sync_single_for_device = swiotlb_sync_single_for_device, +	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, +	.sync_sg_for_device = swiotlb_sync_sg_for_device, +	.mapping_error = swiotlb_dma_mapping_error, +	.dma_supported = NULL, /* FIXME: we should use this instead! */ +}; + +/* At setup time, we use our own ops if the device is a ConneXt one */ +static void sta2x11_setup_pdev(struct pci_dev *pdev) +{ +	struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev); + +	if (!instance) /* either a sta2x11 bridge or another ST device */ +		return; +	pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); +	pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); +	pdev->dev.archdata.dma_ops = &sta2x11_dma_ops; + +	/* We must enable all devices as master, for audio DMA to work */ +	pci_set_master(pdev); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_setup_pdev); + +/* + * The following three functions are exported (used in swiotlb: FIXME) + */ +/** + * dma_capable - Check if device can manage DMA transfers (FIXME: kill it) + * @dev: device for a PCI device + * @addr: DMA address + * @size: DMA size + */ +bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ +	struct sta2x11_mapping *map; + +	if (dev->archdata.dma_ops != &sta2x11_dma_ops) { +		if (!dev->dma_mask) +			return false; +		return addr + size - 1 <= *dev->dma_mask; +	} + +	map = sta2x11_pdev_to_mapping(to_pci_dev(dev)); + +	if (!map || (addr < map->amba_base)) +		return false; +	if (addr + size >= map->amba_base + STA2X11_AMBA_SIZE) { +		return false; +	} + +	return true; +} + +/** + * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device + * @dev: device for a PCI device + * @paddr: Physical address + */ +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ +	if (dev->archdata.dma_ops != &sta2x11_dma_ops) +		return paddr; +	return p2a(paddr, to_pci_dev(dev)); +} + +/** + * dma_to_phys - Return the physical address used for this STA2x11 DMA address + * @dev: device for a PCI device + * @daddr: STA2x11 AMBA DMA address + */ +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ +	if (dev->archdata.dma_ops != &sta2x11_dma_ops) +		return daddr; +	return a2p(daddr, to_pci_dev(dev)); +} + + +/* + * At boot we must set up the mappings for the pcie-to-amba bridge. + * It involves device access, and the same happens at suspend/resume time + */ + +#define AHB_MAPB		0xCA4 +#define AHB_CRW(i)		(AHB_MAPB + 0  + (i) * 0x10) +#define AHB_CRW_SZMASK			0xfffffc00UL +#define AHB_CRW_ENABLE			(1 << 0) +#define AHB_CRW_WTYPE_MEM		(2 << 1) +#define AHB_CRW_ROE			(1UL << 3)	/* Relax Order Ena */ +#define AHB_CRW_NSE			(1UL << 4)	/* No Snoop Enable */ +#define AHB_BASE(i)		(AHB_MAPB + 4  + (i) * 0x10) +#define AHB_PEXLBASE(i)		(AHB_MAPB + 8  + (i) * 0x10) +#define AHB_PEXHBASE(i)		(AHB_MAPB + 12 + (i) * 0x10) + +/* At probe time, enable mapping for each endpoint, using the pdev */ +static void sta2x11_map_ep(struct pci_dev *pdev) +{ +	struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev); +	int i; + +	if (!map) +		return; +	pci_read_config_dword(pdev, AHB_BASE(0), &map->amba_base); + +	/* Configure AHB mapping */ +	pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0); +	pci_write_config_dword(pdev, AHB_PEXHBASE(0), 0); +	pci_write_config_dword(pdev, AHB_CRW(0), STA2X11_AMBA_SIZE | +			       AHB_CRW_WTYPE_MEM | AHB_CRW_ENABLE); + +	/* Disable all the other windows */ +	for (i = 1; i < STA2X11_NR_FUNCS; i++) +		pci_write_config_dword(pdev, AHB_CRW(i), 0); + +	dev_info(&pdev->dev, +		 "sta2x11: Map EP %i: AMBA address %#8x-%#8x\n", +		 sta2x11_pdev_to_ep(pdev),  map->amba_base, +		 map->amba_base + STA2X11_AMBA_SIZE - 1); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_map_ep); + +#ifdef CONFIG_PM /* Some register values must be saved and restored */ + +static void suspend_mapping(struct pci_dev *pdev) +{ +	struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev); +	int i; + +	if (!map) +		return; + +	if (map->is_suspended) +		return; +	map->is_suspended = 1; + +	/* Save all window configs */ +	for (i = 0; i < STA2X11_NR_FUNCS; i++) { +		struct sta2x11_ahb_regs *regs = map->regs + i; + +		pci_read_config_dword(pdev, AHB_BASE(i), ®s->base); +		pci_read_config_dword(pdev, AHB_PEXLBASE(i), ®s->pexlbase); +		pci_read_config_dword(pdev, AHB_PEXHBASE(i), ®s->pexhbase); +		pci_read_config_dword(pdev, AHB_CRW(i), ®s->crw); +	} +} +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, suspend_mapping); + +static void resume_mapping(struct pci_dev *pdev) +{ +	struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev); +	int i; + +	if (!map) +		return; + + +	if (!map->is_suspended) +		goto out; +	map->is_suspended = 0; + +	/* Restore all window configs */ +	for (i = 0; i < STA2X11_NR_FUNCS; i++) { +		struct sta2x11_ahb_regs *regs = map->regs + i; + +		pci_write_config_dword(pdev, AHB_BASE(i), regs->base); +		pci_write_config_dword(pdev, AHB_PEXLBASE(i), regs->pexlbase); +		pci_write_config_dword(pdev, AHB_PEXHBASE(i), regs->pexhbase); +		pci_write_config_dword(pdev, AHB_CRW(i), regs->crw); +	} +out: +	pci_set_master(pdev); /* Like at boot, enable master on all devices */ +} +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, resume_mapping); + +#endif /* CONFIG_PM */ diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c deleted file mode 100644 index 03008f72eb0..00000000000 --- a/arch/x86/pci/visws.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - *	Low-Level PCI Support for SGI Visual Workstation - * - *	(c) 1999--2000 Martin Mares <mj@ucw.cz> - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/init.h> - -#include <asm/setup.h> -#include <asm/pci_x86.h> -#include <asm/visws/cobalt.h> -#include <asm/visws/lithium.h> - -static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } -static void pci_visws_disable_irq(struct pci_dev *dev) { } - -/* int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; */ -/* void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; */ - -/* void __init pcibios_penalize_isa_irq(int irq, int active) {} */ - - -unsigned int pci_bus0, pci_bus1; - -static int __init visws_map_irq(struct pci_dev *dev, u8 slot, u8 pin) -{ -	int irq, bus = dev->bus->number; - -	pin--; - -	/* Nothing useful at PIIX4 pin 1 */ -	if (bus == pci_bus0 && slot == 4 && pin == 0) -		return -1; - -	/* PIIX4 USB is on Bus 0, Slot 4, Line 3 */ -	if (bus == pci_bus0 && slot == 4 && pin == 3) { -		irq = CO_IRQ(CO_APIC_PIIX4_USB); -		goto out; -	} - -	/* First pin spread down 1 APIC entry per slot */ -	if (pin == 0) { -		irq = CO_IRQ((bus == pci_bus0 ? CO_APIC_PCIB_BASE0 : -						CO_APIC_PCIA_BASE0) + slot); -		goto out; -	} - -	/* lines 1,2,3 from any slot is shared in this twirly pattern */ -	if (bus == pci_bus1) { -		/* lines 1-3 from devices 0 1 rotate over 2 apic entries */ -		irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((slot + (pin - 1)) % 2)); -	} else { /* bus == pci_bus0 */ -		/* lines 1-3 from devices 0-3 rotate over 3 apic entries */ -		if (slot == 0) -			slot = 3; /* same pattern */ -		irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((3 - slot) + (pin - 1) % 3)); -	} -out: -	printk(KERN_DEBUG "PCI: Bus %d Slot %d Line %d -> IRQ %d\n", bus, slot, pin, irq); -	return irq; -} - -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ -	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - -int __init pci_visws_init(void) -{ -	pcibios_enable_irq = &pci_visws_enable_irq; -	pcibios_disable_irq = &pci_visws_disable_irq; - -	/* The VISWS supports configuration access type 1 only */ -	pci_probe = (pci_probe | PCI_PROBE_CONF1) & -		    ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2); - -	pci_bus0 = li_pcib_read16(LI_PCI_BUSNUM) & 0xff; -	pci_bus1 = li_pcia_read16(LI_PCI_BUSNUM) & 0xff; - -	printk(KERN_INFO "PCI: Lithium bridge A bus: %u, " -		"bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0); - -	raw_pci_ops = &pci_direct_conf1; -	pci_scan_bus_with_sysdata(pci_bus0); -	pci_scan_bus_with_sysdata(pci_bus1); -	pci_fixup_irqs(pci_common_swizzle, visws_map_irq); -	pcibios_resource_survey(); -	/* Request bus scan */ -	return 1; -} diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d7b5109f7a9..905956f1646 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -1,8 +1,13 @@  /* - * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux - *			   x86 PCI core to support the Xen PCI Frontend + * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and + * initial domain support. We also handle the DSDT _PRT callbacks for GSI's + * used in HVM and initial domain mode (PV does not parse ACPI, so it has no + * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and + * 0xcf8 PCI configuration read/write.   *   *   Author: Ryan Wilson <hap9@epoch.ncsc.mil> + *           Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> + *           Stefano Stabellini <stefano.stabellini@eu.citrix.com>   */  #include <linux/module.h>  #include <linux/init.h> @@ -19,21 +24,57 @@  #include <xen/events.h>  #include <asm/xen/pci.h> +static int xen_pcifront_enable_irq(struct pci_dev *dev) +{ +	int rc; +	int share = 1; +	int pirq; +	u8 gsi; + +	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); +	if (rc < 0) { +		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", +			 rc); +		return rc; +	} +	/* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/ +	pirq = gsi; + +	if (gsi < NR_IRQS_LEGACY) +		share = 0; + +	rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); +	if (rc < 0) { +		dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n", +			 gsi, pirq, rc); +		return rc; +	} + +	dev->irq = rc; +	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); +	return 0; +} +  #ifdef CONFIG_ACPI -static int xen_hvm_register_pirq(u32 gsi, int triggering) +static int xen_register_pirq(u32 gsi, int gsi_override, int triggering, +			     bool set_pirq)  { -	int rc, irq; +	int rc, pirq = -1, irq = -1;  	struct physdev_map_pirq map_irq;  	int shareable = 0;  	char *name; -	if (!xen_hvm_domain()) -		return -1; +	irq = xen_irq_from_gsi(gsi); +	if (irq > 0) +		return irq; + +	if (set_pirq) +		pirq = gsi;  	map_irq.domid = DOMID_SELF;  	map_irq.type = MAP_PIRQ_TYPE_GSI;  	map_irq.index = gsi; -	map_irq.pirq = -1; +	map_irq.pirq = pirq;  	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);  	if (rc) { @@ -49,19 +90,64 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)  		name = "ioapic-level";  	} -	irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name); +	if (gsi_override >= 0) +		gsi = gsi_override; -	printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); +	irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name); +	if (irq < 0) +		goto out; +	printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi); +out:  	return irq;  }  static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, +				     int trigger, int polarity) +{ +	if (!xen_hvm_domain()) +		return -1; + +	return xen_register_pirq(gsi, -1 /* no GSI override */, trigger, +				 false /* no mapping of GSI to PIRQ */); +} + +#ifdef CONFIG_XEN_DOM0 +static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity) +{ +	int rc, irq; +	struct physdev_setup_gsi setup_gsi; + +	if (!xen_pv_domain()) +		return -1; + +	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", +			gsi, triggering, polarity); + +	irq = xen_register_pirq(gsi, gsi_override, triggering, true); + +	setup_gsi.gsi = gsi; +	setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); +	setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + +	rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); +	if (rc == -EEXIST) +		printk(KERN_INFO "Already setup the GSI :%d\n", gsi); +	else if (rc) { +		printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", +				gsi, rc); +	} + +	return irq; +} + +static int acpi_register_gsi_xen(struct device *dev, u32 gsi,  				 int trigger, int polarity)  { -	return xen_hvm_register_pirq(gsi, trigger); +	return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity);  }  #endif +#endif  #if defined(CONFIG_PCI_MSI)  #include <linux/msi.h> @@ -70,6 +156,52 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,  struct xen_pci_frontend_ops *xen_pci_frontend;  EXPORT_SYMBOL_GPL(xen_pci_frontend); +static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ +	int irq, ret, i; +	struct msi_desc *msidesc; +	int *v; + +	if (type == PCI_CAP_ID_MSI && nvec > 1) +		return 1; + +	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); +	if (!v) +		return -ENOMEM; + +	if (type == PCI_CAP_ID_MSIX) +		ret = xen_pci_frontend_enable_msix(dev, v, nvec); +	else +		ret = xen_pci_frontend_enable_msi(dev, v); +	if (ret) +		goto error; +	i = 0; +	list_for_each_entry(msidesc, &dev->msi_list, list) { +		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], +					       (type == PCI_CAP_ID_MSI) ? nvec : 1, +					       (type == PCI_CAP_ID_MSIX) ? +					       "pcifront-msi-x" : +					       "pcifront-msi", +						DOMID_SELF); +		if (irq < 0) { +			ret = irq; +			goto free; +		} +		i++; +	} +	kfree(v); +	return 0; + +error: +	dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); +free: +	kfree(v); +	return ret; +} + +#define XEN_PIRQ_MSI_DATA  (MSI_DATA_TRIGGER_EDGE | \ +		MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) +  static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,  		struct msi_msg *msg)  { @@ -83,94 +215,166 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,  		MSI_ADDR_REDIRECTION_CPU |  		MSI_ADDR_DEST_ID(pirq); -	msg->data = -		MSI_DATA_TRIGGER_EDGE | -		MSI_DATA_LEVEL_ASSERT | -		/* delivery mode reserved */ -		(3 << 8) | -		MSI_DATA_VECTOR(0); +	msg->data = XEN_PIRQ_MSI_DATA;  }  static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)  { -	int irq, pirq, ret = 0; +	int irq, pirq;  	struct msi_desc *msidesc;  	struct msi_msg msg; +	if (type == PCI_CAP_ID_MSI && nvec > 1) +		return 1; +  	list_for_each_entry(msidesc, &dev->msi_list, list) { -		xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? -				"msi-x" : "msi", &irq, &pirq); -		if (irq < 0 || pirq < 0) +		__read_msi_msg(msidesc, &msg); +		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | +			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); +		if (msg.data != XEN_PIRQ_MSI_DATA || +		    xen_irq_from_pirq(pirq) < 0) { +			pirq = xen_allocate_pirq_msi(dev, msidesc); +			if (pirq < 0) { +				irq = -ENODEV; +				goto error; +			} +			xen_msi_compose_msg(dev, pirq, &msg); +			__write_msi_msg(msidesc, &msg); +			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); +		} else { +			dev_dbg(&dev->dev, +				"xen: msi already bound to pirq=%d\n", pirq); +		} +		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, +					       (type == PCI_CAP_ID_MSI) ? nvec : 1, +					       (type == PCI_CAP_ID_MSIX) ? +					       "msi-x" : "msi", +					       DOMID_SELF); +		if (irq < 0)  			goto error; -		printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); -		xen_msi_compose_msg(dev, pirq, &msg); -		ret = set_irq_msi(irq, msidesc); -		if (ret < 0) -			goto error_while; -		write_msi_msg(irq, &msg); +		dev_dbg(&dev->dev, +			"xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);  	}  	return 0; -error_while: -	unbind_from_irqhandler(irq, NULL);  error: -	if (ret == -ENODEV) -		dev_err(&dev->dev, "Xen PCI frontend has not registered" \ -				" MSI/MSI-X support!\n"); - -	return ret; +	dev_err(&dev->dev, +		"Xen PCI frontend has not registered MSI/MSI-X support!\n"); +	return irq;  } -/* - * For MSI interrupts we have to use drivers/xen/event.s functions to - * allocate an irq_desc and setup the right */ - +#ifdef CONFIG_XEN_DOM0 +static bool __read_mostly pci_seg_supported = true; -static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)  { -	int irq, ret, i; +	int ret = 0;  	struct msi_desc *msidesc; -	int *v; -	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); -	if (!v) -		return -ENOMEM; - -	if (type == PCI_CAP_ID_MSIX) -		ret = xen_pci_frontend_enable_msix(dev, &v, nvec); -	else -		ret = xen_pci_frontend_enable_msi(dev, &v); -	if (ret) -		goto error; -	i = 0;  	list_for_each_entry(msidesc, &dev->msi_list, list) { -		irq = xen_allocate_pirq(v[i], 0, /* not sharable */ -			(type == PCI_CAP_ID_MSIX) ? -			"pcifront-msi-x" : "pcifront-msi"); -		if (irq < 0) { -			ret = -1; -			goto free; +		struct physdev_map_pirq map_irq; +		domid_t domid; + +		domid = ret = xen_find_device_domain_owner(dev); +		/* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED, +		 * hence check ret value for < 0. */ +		if (ret < 0) +			domid = DOMID_SELF; + +		memset(&map_irq, 0, sizeof(map_irq)); +		map_irq.domid = domid; +		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG; +		map_irq.index = -1; +		map_irq.pirq = -1; +		map_irq.bus = dev->bus->number | +			      (pci_domain_nr(dev->bus) << 16); +		map_irq.devfn = dev->devfn; + +		if (type == PCI_CAP_ID_MSI && nvec > 1) { +			map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI; +			map_irq.entry_nr = nvec; +		} else if (type == PCI_CAP_ID_MSIX) { +			int pos; +			u32 table_offset, bir; + +			pos = dev->msix_cap; +			pci_read_config_dword(dev, pos + PCI_MSIX_TABLE, +					      &table_offset); +			bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); + +			map_irq.table_base = pci_resource_start(dev, bir); +			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;  		} -		ret = set_irq_msi(irq, msidesc); -		if (ret) -			goto error_while; -		i++; -	} -	kfree(v); -	return 0; +		ret = -EINVAL; +		if (pci_seg_supported) +			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, +						    &map_irq); +		if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) { +			/* +			 * If MAP_PIRQ_TYPE_MULTI_MSI is not available +			 * there's nothing else we can do in this case. +			 * Just set ret > 0 so driver can retry with +			 * single MSI. +			 */ +			ret = 1; +			goto out; +		} +		if (ret == -EINVAL && !pci_domain_nr(dev->bus)) { +			map_irq.type = MAP_PIRQ_TYPE_MSI; +			map_irq.index = -1; +			map_irq.pirq = -1; +			map_irq.bus = dev->bus->number; +			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, +						    &map_irq); +			if (ret != -EINVAL) +				pci_seg_supported = false; +		} +		if (ret) { +			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", +				 ret, domid); +			goto out; +		} -error_while: -	unbind_from_irqhandler(irq, NULL); -error: -	if (ret == -ENODEV) -		dev_err(&dev->dev, "Xen PCI frontend has not registered" \ -			" MSI/MSI-X support!\n"); -free: -	kfree(v); +		ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq, +		                               (type == PCI_CAP_ID_MSI) ? nvec : 1, +		                               (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi", +		                               domid); +		if (ret < 0) +			goto out; +	} +	ret = 0; +out:  	return ret;  } +static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) +{ +	int ret = 0; + +	if (pci_seg_supported) { +		struct physdev_pci_device restore_ext; + +		restore_ext.seg = pci_domain_nr(dev->bus); +		restore_ext.bus = dev->bus->number; +		restore_ext.devfn = dev->devfn; +		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext, +					&restore_ext); +		if (ret == -ENOSYS) +			pci_seg_supported = false; +		WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret); +	} +	if (!pci_seg_supported) { +		struct physdev_restore_msi restore; + +		restore.bus = dev->bus->number; +		restore.devfn = dev->devfn; +		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore); +		WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret); +	} +} +#endif +  static void xen_teardown_msi_irqs(struct pci_dev *dev)  {  	struct msi_desc *msidesc; @@ -180,56 +384,24 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)  		xen_pci_frontend_disable_msix(dev);  	else  		xen_pci_frontend_disable_msi(dev); + +	/* Free the IRQ's and the msidesc using the generic code. */ +	default_teardown_msi_irqs(dev);  }  static void xen_teardown_msi_irq(unsigned int irq)  {  	xen_destroy_irq(irq);  } - -static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static u32 xen_nop_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)  { -	int irq, ret; -	struct msi_desc *msidesc; - -	list_for_each_entry(msidesc, &dev->msi_list, list) { -		irq = xen_create_msi_irq(dev, msidesc, type); -		if (irq < 0) -			return -1; - -		ret = set_irq_msi(irq, msidesc); -		if (ret) -			goto error; -	}  	return 0; - -error: -	xen_destroy_irq(irq); -	return ret;  } -#endif - -static int xen_pcifront_enable_irq(struct pci_dev *dev) +static u32 xen_nop_msix_mask_irq(struct msi_desc *desc, u32 flag)  { -	int rc; -	int share = 1; - -	dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); - -	if (dev->irq < 0) -		return -EINVAL; - -	if (dev->irq < NR_IRQS_LEGACY) -		share = 0; - -	rc = xen_allocate_pirq(dev->irq, share, "pcifront"); -	if (rc < 0) { -		dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", -			 dev->irq, rc); -		return rc; -	}  	return 0;  } +#endif  int __init pci_xen_init(void)  { @@ -252,13 +424,15 @@ int __init pci_xen_init(void)  	x86_msi.setup_msi_irqs = xen_setup_msi_irqs;  	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;  	x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs; +	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq; +	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;  #endif  	return 0;  }  int __init pci_xen_hvm_init(void)  { -	if (!xen_feature(XENFEAT_hvm_pirqs)) +	if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))  		return 0;  #ifdef CONFIG_ACPI @@ -277,79 +451,13 @@ int __init pci_xen_hvm_init(void)  }  #ifdef CONFIG_XEN_DOM0 -static int xen_register_pirq(u32 gsi, int triggering) -{ -	int rc, irq; -	struct physdev_map_pirq map_irq; -	int shareable = 0; -	char *name; - -	if (!xen_pv_domain()) -		return -1; - -	if (triggering == ACPI_EDGE_SENSITIVE) { -		shareable = 0; -		name = "ioapic-edge"; -	} else { -		shareable = 1; -		name = "ioapic-level"; -	} - -	irq = xen_allocate_pirq(gsi, shareable, name); - -	printk(KERN_DEBUG "xen: --> irq=%d\n", irq); - -	if (irq < 0) -		goto out; - -	map_irq.domid = DOMID_SELF; -	map_irq.type = MAP_PIRQ_TYPE_GSI; -	map_irq.index = gsi; -	map_irq.pirq = irq; - -	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); -	if (rc) { -		printk(KERN_WARNING "xen map irq failed %d\n", rc); -		return -1; -	} - -out: -	return irq; -} - -static int xen_register_gsi(u32 gsi, int triggering, int polarity) -{ -	int rc, irq; -	struct physdev_setup_gsi setup_gsi; - -	if (!xen_pv_domain()) -		return -1; - -	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", -			gsi, triggering, polarity); - -	irq = xen_register_pirq(gsi, triggering); - -	setup_gsi.gsi = gsi; -	setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); -	setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - -	rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); -	if (rc == -EEXIST) -		printk(KERN_INFO "Already setup the GSI :%d\n", gsi); -	else if (rc) { -		printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", -				gsi, rc); -	} - -	return irq; -} -  static __init void xen_setup_acpi_sci(void)  {  	int rc;  	int trigger, polarity;  	int gsi = acpi_sci_override_gsi; +	int irq = -1; +	int gsi_override = -1;  	if (!gsi)  		return; @@ -362,55 +470,134 @@ static __init void xen_setup_acpi_sci(void)  	}  	trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;  	polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; -	 +  	printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "  			"polarity=%d\n", gsi, trigger, polarity); -	gsi = xen_register_gsi(gsi, trigger, polarity); +	/* Before we bind the GSI to a Linux IRQ, check whether +	 * we need to override it with bus_irq (IRQ) value. Usually for +	 * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: +	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) +	 * but there are oddballs where the IRQ != GSI: +	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) +	 * which ends up being: gsi_to_irq[9] == 20 +	 * (which is what acpi_gsi_to_irq ends up calling when starting the +	 * the ACPI interpreter and keels over since IRQ 9 has not been +	 * setup as we had setup IRQ 20 for it). +	 */ +	if (acpi_gsi_to_irq(gsi, &irq) == 0) { +		/* Use the provided value if it's valid. */ +		if (irq >= 0) +			gsi_override = irq; +	} + +	gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);  	printk(KERN_INFO "xen: acpi sci %d\n", gsi);  	return;  } -static int acpi_register_gsi_xen(struct device *dev, u32 gsi, -				 int trigger, int polarity) +int __init pci_xen_initial_domain(void)  { -	return xen_register_gsi(gsi, trigger, polarity); -} +	int irq; -static int __init pci_xen_initial_domain(void) -{  #ifdef CONFIG_PCI_MSI  	x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;  	x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; +	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq; +	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;  #endif  	xen_setup_acpi_sci();  	__acpi_register_gsi = acpi_register_gsi_xen; +	/* Pre-allocate legacy irqs */ +	for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { +		int trigger, polarity; +		if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) +			continue; + +		xen_register_pirq(irq, -1 /* no GSI override */, +			trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE, +			true /* Map GSI to PIRQ */); +	} +	if (0 == nr_ioapics) { +		for (irq = 0; irq < NR_IRQS_LEGACY; irq++) +			xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic"); +	}  	return 0;  } -void __init xen_setup_pirqs(void) -{ -	int irq; +struct xen_device_domain_owner { +	domid_t domain; +	struct pci_dev *dev; +	struct list_head list; +}; -	pci_xen_initial_domain(); +static DEFINE_SPINLOCK(dev_domain_list_spinlock); +static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); -	if (0 == nr_ioapics) { -		for (irq = 0; irq < NR_IRQS_LEGACY; irq++) -			xen_allocate_pirq(irq, 0, "xt-pic"); -		return; +static struct xen_device_domain_owner *find_device(struct pci_dev *dev) +{ +	struct xen_device_domain_owner *owner; + +	list_for_each_entry(owner, &dev_domain_list, list) { +		if (owner->dev == dev) +			return owner;  	} +	return NULL; +} -	/* Pre-allocate legacy irqs */ -	for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { -		int trigger, polarity; +int xen_find_device_domain_owner(struct pci_dev *dev) +{ +	struct xen_device_domain_owner *owner; +	int domain = -ENODEV; + +	spin_lock(&dev_domain_list_spinlock); +	owner = find_device(dev); +	if (owner) +		domain = owner->domain; +	spin_unlock(&dev_domain_list_spinlock); +	return domain; +} +EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); -		if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) -			continue; +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) +{ +	struct xen_device_domain_owner *owner; + +	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); +	if (!owner) +		return -ENODEV; -		xen_register_pirq(irq, -			trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); +	spin_lock(&dev_domain_list_spinlock); +	if (find_device(dev)) { +		spin_unlock(&dev_domain_list_spinlock); +		kfree(owner); +		return -EEXIST;  	} +	owner->domain = domain; +	owner->dev = dev; +	list_add_tail(&owner->list, &dev_domain_list); +	spin_unlock(&dev_domain_list_spinlock); +	return 0; +} +EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); + +int xen_unregister_device_domain_owner(struct pci_dev *dev) +{ +	struct xen_device_domain_owner *owner; + +	spin_lock(&dev_domain_list_spinlock); +	owner = find_device(dev); +	if (!owner) { +		spin_unlock(&dev_domain_list_spinlock); +		return -ENODEV; +	} +	list_del(&owner->list); +	spin_unlock(&dev_domain_list_spinlock); +	kfree(owner); +	return 0;  } +EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);  #endif  | 
