From 937383a58e47154d3098783df739e8fa8984a434 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 5 Oct 2011 22:28:05 +0100 Subject: PCI: Add Solarflare vendor ID and SFC4000 device IDs These will be shared between the sfc driver and a PCI quirk. Signed-off-by: Ben Hutchings Signed-off-by: Jesse Barnes --- drivers/net/sfc/efx.c | 10 ++++++---- drivers/net/sfc/efx.h | 4 ---- drivers/net/sfc/falcon.c | 3 ++- drivers/net/sfc/falcon_boards.c | 3 ++- include/linux/pci_ids.h | 5 +++++ 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index b59abc706d9..f8b9be31c82 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -2197,13 +2197,15 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) /* PCI device ID table */ static DEFINE_PCI_DEVICE_TABLE(efx_pci_table) = { - {PCI_DEVICE(EFX_VENDID_SFC, FALCON_A_P_DEVID), + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0), .driver_data = (unsigned long) &falcon_a1_nic_type}, - {PCI_DEVICE(EFX_VENDID_SFC, FALCON_B_P_DEVID), + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000B), .driver_data = (unsigned long) &falcon_b0_nic_type}, - {PCI_DEVICE(EFX_VENDID_SFC, BETHPAGE_A_P_DEVID), + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, BETHPAGE_A_P_DEVID), .driver_data = (unsigned long) &siena_a0_nic_type}, - {PCI_DEVICE(EFX_VENDID_SFC, SIENA_A_P_DEVID), + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, SIENA_A_P_DEVID), .driver_data = (unsigned long) &siena_a0_nic_type}, {0} /* end of list */ }; diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h index b0d1209ea18..c7e75234c74 100644 --- a/drivers/net/sfc/efx.h +++ b/drivers/net/sfc/efx.h @@ -15,10 +15,6 @@ #include "filter.h" /* PCI IDs */ -#define EFX_VENDID_SFC 0x1924 -#define FALCON_A_P_DEVID 0x0703 -#define FALCON_A_S_DEVID 0x6703 -#define FALCON_B_P_DEVID 0x0710 #define BETHPAGE_A_P_DEVID 0x0803 #define SIENA_A_P_DEVID 0x0813 diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c index 94bf4aaf984..9334b598020 100644 --- a/drivers/net/sfc/falcon.c +++ b/drivers/net/sfc/falcon.c @@ -1424,7 +1424,8 @@ static int falcon_probe_nic(struct efx_nic *efx) } dev = pci_dev_get(efx->pci_dev); - while ((dev = pci_get_device(EFX_VENDID_SFC, FALCON_A_S_DEVID, + while ((dev = pci_get_device(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1, dev))) { if (dev->bus == efx->pci_dev->bus && dev->devfn == efx->pci_dev->devfn + 1) { diff --git a/drivers/net/sfc/falcon_boards.c b/drivers/net/sfc/falcon_boards.c index b9cc846811d..6cc16b8cc6f 100644 --- a/drivers/net/sfc/falcon_boards.c +++ b/drivers/net/sfc/falcon_boards.c @@ -764,7 +764,8 @@ int falcon_probe_board(struct efx_nic *efx, u16 revision_info) if (board->type) { netif_info(efx, probe, efx->net_dev, "board is %s rev %c%d\n", - (efx->pci_dev->subsystem_vendor == EFX_VENDID_SFC) + (efx->pci_dev->subsystem_vendor == + PCI_VENDOR_ID_SOLARFLARE) ? board->type->ref_model : board->type->gen_type, 'A' + board->major, board->minor); return 0; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ae96bbe5451..1679ff6931f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2302,6 +2302,11 @@ #define PCI_DEVICE_ID_RENESAS_SH7785 0x0007 #define PCI_DEVICE_ID_RENESAS_SH7786 0x0010 +#define PCI_VENDOR_ID_SOLARFLARE 0x1924 +#define PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0 0x0703 +#define PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1 0x6703 +#define PCI_DEVICE_ID_SOLARFLARE_SFC4000B 0x0710 + #define PCI_VENDOR_ID_TDI 0x192E #define PCI_DEVICE_ID_TDI_EHCI 0x0101 -- cgit v1.2.3-18-g5258 From a94d072b20239f6f615dc20f0a54f63e9b642f9e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 5 Oct 2011 22:35:03 +0100 Subject: PCI: Add quirk for known incorrect MPSS Using legacy interrupts and TLPs > 256 bytes on the SFC4000 (all revisions) may cause interrupt messages to be replayed. In some systems this results in a non-recoverable MCE. Early boards using the SFC4000 set the maximum payload size supported (MPSS) to 1024 bytes and we should override that. There are probably other devices with similar issues, so give this quirk a generic name. Signed-off-by: Ben Hutchings Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1196f61a4ab..f70c36f9b1f 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2822,6 +2822,20 @@ static void __devinit fixup_ti816x_class(struct pci_dev* dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class); +/* Some PCIe devices do not work reliably with the claimed maximum + * payload size supported. + */ +static void __devinit fixup_mpss_256(struct pci_dev *dev) +{ + dev->pcie_mpss = 1; /* 256 bytes */ +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0, fixup_mpss_256); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1, fixup_mpss_256); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000B, fixup_mpss_256); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { -- cgit v1.2.3-18-g5258 From 72da0b07b1b497927758a2102b856ce41e4ba81e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 15 Sep 2011 08:58:51 +0100 Subject: x86: constify PCI raw ops structures As with any other such change, the goal is to prevent inadvertent writes to these structures (assuming DEBUG_RODATA is enabled), and to separate data (possibly frequently) written to from such never getting modified. Reviewed-by: Ingo Molnar Signed-off-by: Jan Beulich Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 6 +++--- arch/x86/pci/ce4100.c | 2 +- arch/x86/pci/common.c | 4 ++-- arch/x86/pci/direct.c | 6 +++--- arch/x86/pci/mmconfig_32.c | 2 +- arch/x86/pci/mmconfig_64.c | 2 +- arch/x86/pci/numaq_32.c | 2 +- arch/x86/pci/olpc.c | 2 +- arch/x86/pci/pcbios.c | 4 ++-- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 704526734be..e3819780685 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -99,10 +99,10 @@ struct pci_raw_ops { int reg, int len, u32 val); }; -extern struct pci_raw_ops *raw_pci_ops; -extern struct pci_raw_ops *raw_pci_ext_ops; +extern const struct pci_raw_ops *raw_pci_ops; +extern const struct pci_raw_ops *raw_pci_ext_ops; -extern struct pci_raw_ops pci_direct_conf1; +extern const struct pci_raw_ops pci_direct_conf1; extern bool port_cf9_safe; /* arch_initcall level */ diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c index 99176094500..41bd2a2d2c5 100644 --- a/arch/x86/pci/ce4100.c +++ b/arch/x86/pci/ce4100.c @@ -304,7 +304,7 @@ static int ce4100_conf_write(unsigned int seg, unsigned int bus, return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); } -struct pci_raw_ops ce4100_pci_conf = { +static const struct pci_raw_ops ce4100_pci_conf = { .read = ce4100_conf_read, .write = ce4100_conf_write, }; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 92df322e0b5..7962ccb4d9b 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -33,8 +33,8 @@ int noioapicreroute = 1; int pcibios_last_bus = -1; unsigned long pirq_table_addr; struct pci_bus *pci_root_bus; -struct pci_raw_ops *raw_pci_ops; -struct pci_raw_ops *raw_pci_ext_ops; +const struct pci_raw_ops *__read_mostly raw_pci_ops; +const struct pci_raw_ops *__read_mostly raw_pci_ext_ops; int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 4f2c70439d7..15460590b8c 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -79,7 +79,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, #undef PCI_CONF1_ADDRESS -struct pci_raw_ops pci_direct_conf1 = { +const struct pci_raw_ops pci_direct_conf1 = { .read = pci_conf1_read, .write = pci_conf1_write, }; @@ -175,7 +175,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, #undef PCI_CONF2_ADDRESS -struct pci_raw_ops pci_direct_conf2 = { +static const struct pci_raw_ops pci_direct_conf2 = { .read = pci_conf2_read, .write = pci_conf2_write, }; @@ -191,7 +191,7 @@ struct pci_raw_ops pci_direct_conf2 = { * This should be close to trivial, but it isn't, because there are buggy * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. */ -static int __init pci_sanity_check(struct pci_raw_ops *o) +static int __init pci_sanity_check(const struct pci_raw_ops *o) { u32 x = 0; int year, devfn; diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index a3d9c54792a..5372e86834c 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -117,7 +117,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, return 0; } -static struct pci_raw_ops pci_mmcfg = { +static const struct pci_raw_ops pci_mmcfg = { .read = pci_mmcfg_read, .write = pci_mmcfg_write, }; diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index e783841bd1d..915a493502c 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -81,7 +81,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, return 0; } -static struct pci_raw_ops pci_mmcfg = { +static const struct pci_raw_ops pci_mmcfg = { .read = pci_mmcfg_read, .write = pci_mmcfg_write, }; diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 512a88c4150..51abf02f922 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -110,7 +110,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, #undef PCI_CONF1_MQ_ADDRESS -static struct pci_raw_ops pci_direct_conf1_mq = { +static const struct pci_raw_ops pci_direct_conf1_mq = { .read = pci_conf1_mq_read, .write = pci_conf1_mq_write }; diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index 5262603b04d..7043a4f0e98 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -301,7 +301,7 @@ static int pci_olpc_write(unsigned int seg, unsigned int bus, return 0; } -static struct pci_raw_ops pci_olpc_conf = { +static const struct pci_raw_ops pci_olpc_conf = { .read = pci_olpc_read, .write = pci_olpc_write, }; diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index f6855355146..db0e9a51e61 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -303,7 +303,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, * Function table for BIOS32 access */ -static struct pci_raw_ops pci_bios_access = { +static const struct pci_raw_ops pci_bios_access = { .read = pci_bios_read, .write = pci_bios_write }; @@ -312,7 +312,7 @@ static struct pci_raw_ops pci_bios_access = { * Try to find PCI BIOS. */ -static struct pci_raw_ops * __devinit pci_find_bios(void) +static const struct pci_raw_ops * __devinit pci_find_bios(void) { union bios32 *check; unsigned char sum; -- cgit v1.2.3-18-g5258 From e24442733ee486c99d03fe2ecd98924d1bc14c51 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 11 Sep 2011 14:08:38 -0300 Subject: PCI: Make pci_setup_bridge() non-static for use by arch code The "powernv" platform of the powerpc architecture needs to assign PCI resources using a specific algorithm to fit some HW constraints of the IBM "IODA" architecture (related to the ability to create error handling domains that encompass specific segments of MMIO space). For doing so, it wants to call pci_setup_bridge() from architecture specific resource management in order to configure bridges after all resources have been assigned. So make it non-static. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 2 +- include/linux/pci.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 784da9d3602..86b69f85f90 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -426,7 +426,7 @@ static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type) pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl); } -static void pci_setup_bridge(struct pci_bus *bus) +void pci_setup_bridge(struct pci_bus *bus) { unsigned long type = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; diff --git a/include/linux/pci.h b/include/linux/pci.h index 9fc01226055..4ff6d4e9455 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -955,6 +955,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), int pci_cfg_space_size_ext(struct pci_dev *dev); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); +void pci_setup_bridge(struct pci_bus *bus); #define PCI_VGA_STATE_CHANGE_BRIDGE (1 << 0) #define PCI_VGA_STATE_CHANGE_DECODES (1 << 1) -- cgit v1.2.3-18-g5258 From 3e309cdf07c930f29a4e0f233e47d399bea34c68 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 5 Oct 2011 11:44:50 -0400 Subject: PCI quirk: mmc: Always check for lower base frequency quirk for Ricoh 1180:e823 Commit 15bed0f2f added a quirk for the e823 Ricoh card reader to lower the base frequency. However, the quirk first checks to see if the proprietary MMC controller is disabled, and returns if so. On some devices, such as the Lenovo X220, the MMC controller is already disabled by firmware it seems, but the frequency change is still needed so sdhci-pci can talk to the cards. Since the MMC controller is disabled, the frequency fixup was never being run on these machines. This moves the e823 check above the MMC controller check so that it always gets run. This fixes https://bugzilla.redhat.com/show_bug.cgi?id=722509 Signed-off-by: Josh Boyer Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f70c36f9b1f..e5aadf357ae 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2745,20 +2745,6 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev) /* disable must be done via function #0 */ if (PCI_FUNC(dev->devfn)) return; - - pci_read_config_byte(dev, 0xCB, &disable); - - if (disable & 0x02) - return; - - pci_read_config_byte(dev, 0xCA, &write_enable); - pci_write_config_byte(dev, 0xCA, 0x57); - pci_write_config_byte(dev, 0xCB, disable | 0x02); - pci_write_config_byte(dev, 0xCA, write_enable); - - dev_notice(&dev->dev, "proprietary Ricoh MMC controller disabled (via firewire function)\n"); - dev_notice(&dev->dev, "MMC cards are now supported by standard SDHCI controller\n"); - /* * RICOH 0xe823 SD/MMC card reader fails to recognize * certain types of SD/MMC cards. Lowering the SD base @@ -2781,6 +2767,20 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev) dev_notice(&dev->dev, "MMC controller base frequency changed to 50Mhz.\n"); } + + pci_read_config_byte(dev, 0xCB, &disable); + + if (disable & 0x02) + return; + + pci_read_config_byte(dev, 0xCA, &write_enable); + pci_write_config_byte(dev, 0xCA, 0x57); + pci_write_config_byte(dev, 0xCB, disable | 0x02); + pci_write_config_byte(dev, 0xCA, write_enable); + + dev_notice(&dev->dev, "proprietary Ricoh MMC controller disabled (via firewire function)\n"); + dev_notice(&dev->dev, "MMC cards are now supported by standard SDHCI controller\n"); + } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); -- cgit v1.2.3-18-g5258 From 379021d5c0899fcf9410cae4ca7a59a5a94ca769 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Oct 2011 23:16:33 +0200 Subject: PCI / PM: Extend PME polling to all PCI devices The land of PCI power management is a land of sorrow and ugliness, especially in the area of signaling events by devices. There are devices that set their PME Status bits, but don't really bother to send a PME message or assert PME#. There are hardware vendors who don't connect PME# lines to the system core logic (they know who they are). There are PCI Express Root Ports that don't bother to trigger interrupts when they receive PME messages from the devices below. There are ACPI BIOSes that forget to provide _PRW methods for devices capable of signaling wakeup. Finally, there are BIOSes that do provide _PRW methods for such devices, but then don't bother to call Notify() for those devices from the corresponding _Lxx/_Exx GPE-handling methods. In all of these cases the kernel doesn't have a chance to receive a proper notification that it should wake up a device, so devices stay in low-power states forever. Worse yet, in some cases they continuously send PME Messages that are silently ignored, because the kernel simply doesn't know that it should clear the device's PME Status bit. This problem was first observed for "parallel" (non-Express) PCI devices on add-on cards and Matthew Garrett addressed it by adding code that polls PME Status bits of such devices, if they are enabled to signal PME, to the kernel. Recently, however, it has turned out that PCI Express devices are also affected by this issue and that it is not limited to add-on devices, so it seems necessary to extend the PME polling to all PCI devices, including PCI Express and planar ones. Still, it would be wasteful to poll the PME Status bits of devices that are known to receive proper PME notifications, so make the kernel (1) poll the PME Status bits of all PCI and PCIe devices enabled to signal PME and (2) disable the PME Status polling for devices for which correct PME notifications are received. Tested-by: Sarah Sharp Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci-acpi.c | 3 +++ drivers/pci/pci.c | 41 ++++++++++++++++++++--------------------- drivers/pci/pcie/pme.c | 9 +++++++++ include/linux/pci.h | 1 + 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index d36f41ea8cb..cd3c4f1cdf1 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -46,6 +46,9 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context) struct pci_dev *pci_dev = context; if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) { + if (pci_dev->pme_poll) + pci_dev->pme_poll = false; + pci_wakeup_event(pci_dev); pci_check_pme_status(pci_dev); pm_runtime_resume(&pci_dev->dev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9651f0a881..7cd417e9405 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1407,13 +1407,16 @@ bool pci_check_pme_status(struct pci_dev *dev) /** * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set. * @dev: Device to handle. - * @ign: Ignored. + * @pme_poll_reset: Whether or not to reset the device's pme_poll flag. * * Check if @dev has generated PME and queue a resume request for it in that * case. */ -static int pci_pme_wakeup(struct pci_dev *dev, void *ign) +static int pci_pme_wakeup(struct pci_dev *dev, void *pme_poll_reset) { + if (pme_poll_reset && dev->pme_poll) + dev->pme_poll = false; + if (pci_check_pme_status(dev)) { pci_wakeup_event(dev); pm_request_resume(&dev->dev); @@ -1428,7 +1431,7 @@ static int pci_pme_wakeup(struct pci_dev *dev, void *ign) void pci_pme_wakeup_bus(struct pci_bus *bus) { if (bus) - pci_walk_bus(bus, pci_pme_wakeup, NULL); + pci_walk_bus(bus, pci_pme_wakeup, (void *)true); } /** @@ -1446,30 +1449,25 @@ bool pci_pme_capable(struct pci_dev *dev, pci_power_t state) static void pci_pme_list_scan(struct work_struct *work) { - struct pci_pme_device *pme_dev; + struct pci_pme_device *pme_dev, *n; mutex_lock(&pci_pme_list_mutex); if (!list_empty(&pci_pme_list)) { - list_for_each_entry(pme_dev, &pci_pme_list, list) - pci_pme_wakeup(pme_dev->dev, NULL); - schedule_delayed_work(&pci_pme_work, msecs_to_jiffies(PME_TIMEOUT)); + list_for_each_entry_safe(pme_dev, n, &pci_pme_list, list) { + if (pme_dev->dev->pme_poll) { + pci_pme_wakeup(pme_dev->dev, NULL); + } else { + list_del(&pme_dev->list); + kfree(pme_dev); + } + } + if (!list_empty(&pci_pme_list)) + schedule_delayed_work(&pci_pme_work, + msecs_to_jiffies(PME_TIMEOUT)); } mutex_unlock(&pci_pme_list_mutex); } -/** - * pci_external_pme - is a device an external PCI PME source? - * @dev: PCI device to check - * - */ - -static bool pci_external_pme(struct pci_dev *dev) -{ - if (pci_is_pcie(dev) || dev->bus->number == 0) - return false; - return true; -} - /** * pci_pme_active - enable or disable PCI device's PME# function * @dev: PCI device to handle. @@ -1503,7 +1501,7 @@ void pci_pme_active(struct pci_dev *dev, bool enable) hit, and the power savings from the devices will still be a win. */ - if (pci_external_pme(dev)) { + if (dev->pme_poll) { struct pci_pme_device *pme_dev; if (enable) { pme_dev = kmalloc(sizeof(struct pci_pme_device), @@ -1821,6 +1819,7 @@ void pci_pm_init(struct pci_dev *dev) (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "", (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : ""); dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT; + dev->pme_poll = true; /* * Make device's PM flags reflect the wake-up capability, but * let the user space enable it to wake up the system as needed. diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index 0057344a3fc..001f1b78f39 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -84,6 +84,9 @@ static bool pcie_pme_walk_bus(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { /* Skip PCIe devices in case we started from a root port. */ if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { + if (dev->pme_poll) + dev->pme_poll = false; + pci_wakeup_event(dev); pm_request_resume(&dev->dev); ret = true; @@ -142,6 +145,9 @@ static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id) /* First, check if the PME is from the root port itself. */ if (port->devfn == devfn && port->bus->number == busnr) { + if (port->pme_poll) + port->pme_poll = false; + if (pci_check_pme_status(port)) { pm_request_resume(&port->dev); found = true; @@ -187,6 +193,9 @@ static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id) /* The device is there, but we have to check its PME status. */ found = pci_check_pme_status(dev); if (found) { + if (dev->pme_poll) + dev->pme_poll = false; + pci_wakeup_event(dev); pm_request_resume(&dev->dev); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 4ff6d4e9455..176c981a90d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -273,6 +273,7 @@ struct pci_dev { unsigned int pme_support:5; /* Bitmask of states from which PME# can be generated */ unsigned int pme_interrupt:1; + unsigned int pme_poll:1; /* Poll device's PME status bit */ unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */ unsigned int no_d1d2:1; /* Only allow D0 and D3 */ -- cgit v1.2.3-18-g5258 From 6af8bef14d6fc9e4e52c83fd646412e9dedadd26 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 28 Sep 2011 19:40:53 -0400 Subject: PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes --- drivers/acpi/osl.c | 3 +- drivers/pci/hotplug/acpiphp_glue.c | 109 ++++++++++++++++++++++++++++++++----- include/acpi/acpiosxf.h | 2 + 3 files changed, 98 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index fa32f584229..f31c5c5f1b7 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -80,7 +80,8 @@ static acpi_osd_handler acpi_irq_handler; static void *acpi_irq_context; static struct workqueue_struct *kacpid_wq; static struct workqueue_struct *kacpi_notify_wq; -static struct workqueue_struct *kacpi_hotplug_wq; +struct workqueue_struct *kacpi_hotplug_wq; +EXPORT_SYMBOL(kacpi_hotplug_wq); struct acpi_res_list { resource_size_t start; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 220285760b6..596172b4ae9 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "../pci.h" #include "acpiphp.h" @@ -1149,15 +1150,35 @@ check_sub_bridges(acpi_handle handle, u32 lvl, void *context, void **rv) return AE_OK ; } -/** - * handle_hotplug_event_bridge - handle ACPI event on bridges - * @handle: Notify()'ed acpi_handle - * @type: Notify code - * @context: pointer to acpiphp_bridge structure - * - * Handles ACPI event notification on {host,p2p} bridges. - */ -static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, void *context) +struct acpiphp_hp_work { + struct work_struct work; + acpi_handle handle; + u32 type; + void *context; +}; + +static void alloc_acpiphp_hp_work(acpi_handle handle, u32 type, + void *context, + void (*func)(struct work_struct *work)) +{ + struct acpiphp_hp_work *hp_work; + int ret; + + hp_work = kmalloc(sizeof(*hp_work), GFP_KERNEL); + if (!hp_work) + return; + + hp_work->handle = handle; + hp_work->type = type; + hp_work->context = context; + + INIT_WORK(&hp_work->work, func); + ret = queue_work(kacpi_hotplug_wq, &hp_work->work); + if (!ret) + kfree(hp_work); +} + +static void _handle_hotplug_event_bridge(struct work_struct *work) { struct acpiphp_bridge *bridge; char objname[64]; @@ -1165,11 +1186,18 @@ static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, void *cont .pointer = objname }; struct acpi_device *device; int num_sub_bridges = 0; + struct acpiphp_hp_work *hp_work; + acpi_handle handle; + u32 type; + + hp_work = container_of(work, struct acpiphp_hp_work, work); + handle = hp_work->handle; + type = hp_work->type; if (acpi_bus_get_device(handle, &device)) { /* This bridge must have just been physically inserted */ handle_bridge_insertion(handle, type); - return; + goto out; } bridge = acpiphp_handle_to_bridge(handle); @@ -1180,7 +1208,7 @@ static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, void *cont if (!bridge && !num_sub_bridges) { err("cannot get bridge info\n"); - return; + goto out; } acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); @@ -1241,22 +1269,49 @@ static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, void *cont warn("notify_handler: unknown event type 0x%x for %s\n", type, objname); break; } + +out: + kfree(hp_work); /* allocated in handle_hotplug_event_bridge */ } /** - * handle_hotplug_event_func - handle ACPI event on functions (i.e. slots) + * handle_hotplug_event_bridge - handle ACPI event on bridges * @handle: Notify()'ed acpi_handle * @type: Notify code - * @context: pointer to acpiphp_func structure + * @context: pointer to acpiphp_bridge structure * - * Handles ACPI event notification on slots. + * Handles ACPI event notification on {host,p2p} bridges. */ -static void handle_hotplug_event_func(acpi_handle handle, u32 type, void *context) +static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, + void *context) +{ + /* + * Currently the code adds all hotplug events to the kacpid_wq + * queue when it should add hotplug events to the kacpi_hotplug_wq. + * The proper way to fix this is to reorganize the code so that + * drivers (dock, etc.) do not call acpi_os_execute(), etc. + * For now just re-add this work to the kacpi_hotplug_wq so we + * don't deadlock on hotplug actions. + */ + alloc_acpiphp_hp_work(handle, type, context, + _handle_hotplug_event_bridge); +} + +static void _handle_hotplug_event_func(struct work_struct *work) { struct acpiphp_func *func; char objname[64]; struct acpi_buffer buffer = { .length = sizeof(objname), .pointer = objname }; + struct acpiphp_hp_work *hp_work; + acpi_handle handle; + u32 type; + void *context; + + hp_work = container_of(work, struct acpiphp_hp_work, work); + handle = hp_work->handle; + type = hp_work->type; + context = hp_work->context; acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); @@ -1291,8 +1346,32 @@ static void handle_hotplug_event_func(acpi_handle handle, u32 type, void *contex warn("notify_handler: unknown event type 0x%x for %s\n", type, objname); break; } + + kfree(hp_work); /* allocated in handle_hotplug_event_func */ } +/** + * handle_hotplug_event_func - handle ACPI event on functions (i.e. slots) + * @handle: Notify()'ed acpi_handle + * @type: Notify code + * @context: pointer to acpiphp_func structure + * + * Handles ACPI event notification on slots. + */ +static void handle_hotplug_event_func(acpi_handle handle, u32 type, + void *context) +{ + /* + * Currently the code adds all hotplug events to the kacpid_wq + * queue when it should add hotplug events to the kacpi_hotplug_wq. + * The proper way to fix this is to reorganize the code so that + * drivers (dock, etc.) do not call acpi_os_execute(), etc. + * For now just re-add this work to the kacpi_hotplug_wq so we + * don't deadlock on hotplug actions. + */ + alloc_acpiphp_hp_work(handle, type, context, + _handle_hotplug_event_func); +} static acpi_status find_root_bridges(acpi_handle handle, u32 lvl, void *context, void **rv) diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index 4543b6f7586..83062ed0ef2 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -189,6 +189,8 @@ void acpi_os_fixed_event_count(u32 fixed_event_number); /* * Threads and Scheduling */ +extern struct workqueue_struct *kacpi_hotplug_wq; + acpi_thread_id acpi_os_get_thread_id(void); acpi_status -- cgit v1.2.3-18-g5258 From 78d090b0be3f072a3c95022771c35183af961aaa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Sep 2011 21:44:36 +0200 Subject: PCI / PM: Remove unnecessary error variable from acpi_dev_run_wake() The result returned by acpi_dev_run_wake() is always either -EINVAL or -ENODEV, while obviously it should return 0 on success. The problem is that the leftover error variable, that's not really used in the function, is initialized with -ENODEV and then returned without modification. To fix this issue remove the error variable from acpi_dev_run_wake() and make the function return 0 on success as appropriate. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci-acpi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index cd3c4f1cdf1..4ecb6408b0d 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -285,7 +285,6 @@ static int acpi_dev_run_wake(struct device *phys_dev, bool enable) { struct acpi_device *dev; acpi_handle handle; - int error = -ENODEV; if (!device_run_wake(phys_dev)) return -EINVAL; @@ -305,7 +304,7 @@ static int acpi_dev_run_wake(struct device *phys_dev, bool enable) acpi_disable_wakeup_device_power(dev); } - return error; + return 0; } static void acpi_pci_propagate_run_wake(struct pci_bus *bus, bool enable) -- cgit v1.2.3-18-g5258 From db3c33c6d3fa04ee46b491e9d75d0d3b4798d074 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 27 Sep 2011 15:57:13 +0200 Subject: PCI: Move ATS implementation into own file ATS does not depend on IOV support, so move the code into its own file. This file will also include support for the PRI and PASID capabilities later. Also give ATS its own Kconfig variable to allow selecting it without IOV support. Reviewed-by: Bjorn Helgaas Signed-off-by: Joerg Roedel Signed-off-by: Jesse Barnes --- drivers/pci/Kconfig | 4 ++ drivers/pci/Makefile | 1 + drivers/pci/ats.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/iov.c | 142 -------------------------------------------- include/linux/pci-ats.h | 2 + 5 files changed, 162 insertions(+), 142 deletions(-) create mode 100644 drivers/pci/ats.c diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 0fa466a91bf..1d8ce839586 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -71,9 +71,13 @@ config HT_IRQ If unsure say Y. +config PCI_ATS + bool + config PCI_IOV bool "PCI IOV support" depends on PCI + select PCI_ATS help I/O Virtualization is a PCI feature supported by some devices which allows them to create virtual devices which share their diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 6fadae3ad13..083a49fee56 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_PCI_MSI) += msi.o # Build the Hypertransport interrupt support obj-$(CONFIG_HT_IRQ) += htirq.o +obj-$(CONFIG_PCI_ATS) += ats.o obj-$(CONFIG_PCI_IOV) += iov.o # diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c new file mode 100644 index 00000000000..ae4bf87afb0 --- /dev/null +++ b/drivers/pci/ats.c @@ -0,0 +1,155 @@ +/* + * drivers/pci/ats.c + * + * Copyright (C) 2009 Intel Corporation, Yu Zhao + * + * PCI Express I/O Virtualization (IOV) support. + * Address Translation Service 1.0 + */ + +#include +#include + +#include "pci.h" + +static int ats_alloc_one(struct pci_dev *dev, int ps) +{ + int pos; + u16 cap; + struct pci_ats *ats; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); + if (!pos) + return -ENODEV; + + ats = kzalloc(sizeof(*ats), GFP_KERNEL); + if (!ats) + return -ENOMEM; + + ats->pos = pos; + ats->stu = ps; + pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); + ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : + PCI_ATS_MAX_QDEP; + dev->ats = ats; + + return 0; +} + +static void ats_free_one(struct pci_dev *dev) +{ + kfree(dev->ats); + dev->ats = NULL; +} + +/** + * pci_enable_ats - enable the ATS capability + * @dev: the PCI device + * @ps: the IOMMU page shift + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_ats(struct pci_dev *dev, int ps) +{ + int rc; + u16 ctrl; + + BUG_ON(dev->ats && dev->ats->is_enabled); + + if (ps < PCI_ATS_MIN_STU) + return -EINVAL; + + if (dev->is_physfn || dev->is_virtfn) { + struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn; + + mutex_lock(&pdev->sriov->lock); + if (pdev->ats) + rc = pdev->ats->stu == ps ? 0 : -EINVAL; + else + rc = ats_alloc_one(pdev, ps); + + if (!rc) + pdev->ats->ref_cnt++; + mutex_unlock(&pdev->sriov->lock); + if (rc) + return rc; + } + + if (!dev->is_physfn) { + rc = ats_alloc_one(dev, ps); + if (rc) + return rc; + } + + ctrl = PCI_ATS_CTRL_ENABLE; + if (!dev->is_virtfn) + ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU); + pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + + dev->ats->is_enabled = 1; + + return 0; +} + +/** + * pci_disable_ats - disable the ATS capability + * @dev: the PCI device + */ +void pci_disable_ats(struct pci_dev *dev) +{ + u16 ctrl; + + BUG_ON(!dev->ats || !dev->ats->is_enabled); + + pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl); + ctrl &= ~PCI_ATS_CTRL_ENABLE; + pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + + dev->ats->is_enabled = 0; + + if (dev->is_physfn || dev->is_virtfn) { + struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn; + + mutex_lock(&pdev->sriov->lock); + pdev->ats->ref_cnt--; + if (!pdev->ats->ref_cnt) + ats_free_one(pdev); + mutex_unlock(&pdev->sriov->lock); + } + + if (!dev->is_physfn) + ats_free_one(dev); +} + +/** + * pci_ats_queue_depth - query the ATS Invalidate Queue Depth + * @dev: the PCI device + * + * Returns the queue depth on success, or negative on failure. + * + * The ATS spec uses 0 in the Invalidate Queue Depth field to + * indicate that the function can accept 32 Invalidate Request. + * But here we use the `real' values (i.e. 1~32) for the Queue + * Depth; and 0 indicates the function shares the Queue with + * other functions (doesn't exclusively own a Queue). + */ +int pci_ats_queue_depth(struct pci_dev *dev) +{ + int pos; + u16 cap; + + if (dev->is_virtfn) + return 0; + + if (dev->ats) + return dev->ats->qdep; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); + if (!pos) + return -ENODEV; + + pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); + + return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : + PCI_ATS_MAX_QDEP; +} diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 42fae477651..9b4e88c636f 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -722,145 +722,3 @@ int pci_num_vf(struct pci_dev *dev) return dev->sriov->nr_virtfn; } EXPORT_SYMBOL_GPL(pci_num_vf); - -static int ats_alloc_one(struct pci_dev *dev, int ps) -{ - int pos; - u16 cap; - struct pci_ats *ats; - - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); - if (!pos) - return -ENODEV; - - ats = kzalloc(sizeof(*ats), GFP_KERNEL); - if (!ats) - return -ENOMEM; - - ats->pos = pos; - ats->stu = ps; - pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); - ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : - PCI_ATS_MAX_QDEP; - dev->ats = ats; - - return 0; -} - -static void ats_free_one(struct pci_dev *dev) -{ - kfree(dev->ats); - dev->ats = NULL; -} - -/** - * pci_enable_ats - enable the ATS capability - * @dev: the PCI device - * @ps: the IOMMU page shift - * - * Returns 0 on success, or negative on failure. - */ -int pci_enable_ats(struct pci_dev *dev, int ps) -{ - int rc; - u16 ctrl; - - BUG_ON(dev->ats && dev->ats->is_enabled); - - if (ps < PCI_ATS_MIN_STU) - return -EINVAL; - - if (dev->is_physfn || dev->is_virtfn) { - struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn; - - mutex_lock(&pdev->sriov->lock); - if (pdev->ats) - rc = pdev->ats->stu == ps ? 0 : -EINVAL; - else - rc = ats_alloc_one(pdev, ps); - - if (!rc) - pdev->ats->ref_cnt++; - mutex_unlock(&pdev->sriov->lock); - if (rc) - return rc; - } - - if (!dev->is_physfn) { - rc = ats_alloc_one(dev, ps); - if (rc) - return rc; - } - - ctrl = PCI_ATS_CTRL_ENABLE; - if (!dev->is_virtfn) - ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU); - pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); - - dev->ats->is_enabled = 1; - - return 0; -} - -/** - * pci_disable_ats - disable the ATS capability - * @dev: the PCI device - */ -void pci_disable_ats(struct pci_dev *dev) -{ - u16 ctrl; - - BUG_ON(!dev->ats || !dev->ats->is_enabled); - - pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl); - ctrl &= ~PCI_ATS_CTRL_ENABLE; - pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); - - dev->ats->is_enabled = 0; - - if (dev->is_physfn || dev->is_virtfn) { - struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn; - - mutex_lock(&pdev->sriov->lock); - pdev->ats->ref_cnt--; - if (!pdev->ats->ref_cnt) - ats_free_one(pdev); - mutex_unlock(&pdev->sriov->lock); - } - - if (!dev->is_physfn) - ats_free_one(dev); -} - -/** - * pci_ats_queue_depth - query the ATS Invalidate Queue Depth - * @dev: the PCI device - * - * Returns the queue depth on success, or negative on failure. - * - * The ATS spec uses 0 in the Invalidate Queue Depth field to - * indicate that the function can accept 32 Invalidate Request. - * But here we use the `real' values (i.e. 1~32) for the Queue - * Depth; and 0 indicates the function shares the Queue with - * other functions (doesn't exclusively own a Queue). - */ -int pci_ats_queue_depth(struct pci_dev *dev) -{ - int pos; - u16 cap; - - if (dev->is_virtfn) - return 0; - - if (dev->ats) - return dev->ats->qdep; - - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); - if (!pos) - return -ENODEV; - - pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); - - return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : - PCI_ATS_MAX_QDEP; -} diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 655824fa4c7..4eab42bf2af 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -1,6 +1,8 @@ #ifndef LINUX_PCI_ATS_H #define LINUX_PCI_ATS_H +#include + /* Address Translation Service */ struct pci_ats { int pos; /* capability position */ -- cgit v1.2.3-18-g5258 From d4c0636c2107010f0ef8c4dfbb1d6368ae3b3ed9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 27 Sep 2011 15:57:14 +0200 Subject: PCI: Export ATS functions to modules This patch makes the ATS functions usable for modules. They will be used by a module implementing some advanced AMD IOMMU features. Reviewed-by: Bjorn Helgaas Signed-off-by: Joerg Roedel Signed-off-by: Jesse Barnes --- drivers/pci/ats.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index ae4bf87afb0..5ceff3e16e1 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -90,6 +90,7 @@ int pci_enable_ats(struct pci_dev *dev, int ps) return 0; } +EXPORT_SYMBOL_GPL(pci_enable_ats); /** * pci_disable_ats - disable the ATS capability @@ -120,6 +121,7 @@ void pci_disable_ats(struct pci_dev *dev) if (!dev->is_physfn) ats_free_one(dev); } +EXPORT_SYMBOL_GPL(pci_disable_ats); /** * pci_ats_queue_depth - query the ATS Invalidate Queue Depth @@ -153,3 +155,4 @@ int pci_ats_queue_depth(struct pci_dev *dev) return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : PCI_ATS_MAX_QDEP; } +EXPORT_SYMBOL_GPL(pci_ats_queue_depth); -- cgit v1.2.3-18-g5258 From c320b976d7837c561ce4aa49dfe0a64f0e527ce4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 27 Sep 2011 15:57:15 +0200 Subject: PCI: Add implementation for PRI capability Implement the necessary functions to handle PRI capabilities on PCIe devices. With PRI devices behind an IOMMU can signal page fault conditions to software and recover from such faults. Reviewed-by: Bjorn Helgaas Signed-off-by: Joerg Roedel Signed-off-by: Jesse Barnes --- drivers/pci/Kconfig | 9 +++ drivers/pci/ats.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci-ats.h | 42 ++++++++++++ include/linux/pci_regs.h | 12 ++++ 4 files changed, 230 insertions(+) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 1d8ce839586..fb1e9707f91 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -85,6 +85,15 @@ config PCI_IOV If unsure, say N. +config PCI_PRI + bool "PCI PRI support" + select PCI_ATS + help + PRI is the PCI Page Request Interface. It allows PCI devices that are + behind an IOMMU to recover from page faults. + + If unsure, say N. + config PCI_IOAPIC bool depends on PCI diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 5ceff3e16e1..bf892a025d4 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -2,9 +2,11 @@ * drivers/pci/ats.c * * Copyright (C) 2009 Intel Corporation, Yu Zhao + * Copyright (C) 2011 Advanced Micro Devices, * * PCI Express I/O Virtualization (IOV) support. * Address Translation Service 1.0 + * Page Request Interface added by Joerg Roedel */ #include @@ -156,3 +158,168 @@ int pci_ats_queue_depth(struct pci_dev *dev) PCI_ATS_MAX_QDEP; } EXPORT_SYMBOL_GPL(pci_ats_queue_depth); + +#ifdef CONFIG_PCI_PRI +/** + * pci_enable_pri - Enable PRI capability + * @ pdev: PCI device structure + * + * Returns 0 on success, negative value on error + */ +int pci_enable_pri(struct pci_dev *pdev, u32 reqs) +{ + u16 control, status; + u32 max_requests; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); + if (!pos) + return -EINVAL; + + pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); + pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); + if ((control & PCI_PRI_ENABLE) || !(status & PCI_PRI_STATUS_STOPPED)) + return -EBUSY; + + pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ_OFF, &max_requests); + reqs = min(max_requests, reqs); + pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ_OFF, reqs); + + control |= PCI_PRI_ENABLE; + pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_enable_pri); + +/** + * pci_disable_pri - Disable PRI capability + * @pdev: PCI device structure + * + * Only clears the enabled-bit, regardless of its former value + */ +void pci_disable_pri(struct pci_dev *pdev) +{ + u16 control; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); + if (!pos) + return; + + pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); + control &= ~PCI_PRI_ENABLE; + pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); +} +EXPORT_SYMBOL_GPL(pci_disable_pri); + +/** + * pci_pri_enabled - Checks if PRI capability is enabled + * @pdev: PCI device structure + * + * Returns true if PRI is enabled on the device, false otherwise + */ +bool pci_pri_enabled(struct pci_dev *pdev) +{ + u16 control; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); + if (!pos) + return false; + + pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); + + return (control & PCI_PRI_ENABLE) ? true : false; +} +EXPORT_SYMBOL_GPL(pci_pri_enabled); + +/** + * pci_reset_pri - Resets device's PRI state + * @pdev: PCI device structure + * + * The PRI capability must be disabled before this function is called. + * Returns 0 on success, negative value on error. + */ +int pci_reset_pri(struct pci_dev *pdev) +{ + u16 control; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); + if (!pos) + return -EINVAL; + + pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); + if (control & PCI_PRI_ENABLE) + return -EBUSY; + + control |= PCI_PRI_RESET; + + pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_reset_pri); + +/** + * pci_pri_stopped - Checks whether the PRI capability is stopped + * @pdev: PCI device structure + * + * Returns true if the PRI capability on the device is disabled and the + * device has no outstanding PRI requests, false otherwise. The device + * indicates this via the STOPPED bit in the status register of the + * capability. + * The device internal state can be cleared by resetting the PRI state + * with pci_reset_pri(). This can force the capability into the STOPPED + * state. + */ +bool pci_pri_stopped(struct pci_dev *pdev) +{ + u16 control, status; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); + if (!pos) + return true; + + pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); + pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); + + if (control & PCI_PRI_ENABLE) + return false; + + return (status & PCI_PRI_STATUS_STOPPED) ? true : false; +} +EXPORT_SYMBOL_GPL(pci_pri_stopped); + +/** + * pci_pri_status - Request PRI status of a device + * @pdev: PCI device structure + * + * Returns negative value on failure, status on success. The status can + * be checked against status-bits. Supported bits are currently: + * PCI_PRI_STATUS_RF: Response failure + * PCI_PRI_STATUS_UPRGI: Unexpected Page Request Group Index + * PCI_PRI_STATUS_STOPPED: PRI has stopped + */ +int pci_pri_status(struct pci_dev *pdev) +{ + u16 status, control; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); + if (!pos) + return -EINVAL; + + pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); + pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); + + /* Stopped bit is undefined when enable == 1, so clear it */ + if (control & PCI_PRI_ENABLE) + status &= ~PCI_PRI_STATUS_STOPPED; + + return status; +} +EXPORT_SYMBOL_GPL(pci_pri_status); +#endif /* CONFIG_PCI_PRI */ diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 4eab42bf2af..071395251ab 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -17,6 +17,7 @@ struct pci_ats { extern int pci_enable_ats(struct pci_dev *dev, int ps); extern void pci_disable_ats(struct pci_dev *dev); extern int pci_ats_queue_depth(struct pci_dev *dev); + /** * pci_ats_enabled - query the ATS status * @dev: the PCI device @@ -51,4 +52,45 @@ static inline int pci_ats_enabled(struct pci_dev *dev) #endif /* CONFIG_PCI_IOV */ +#ifdef CONFIG_PCI_PRI + +extern int pci_enable_pri(struct pci_dev *pdev, u32 reqs); +extern void pci_disable_pri(struct pci_dev *pdev); +extern bool pci_pri_enabled(struct pci_dev *pdev); +extern int pci_reset_pri(struct pci_dev *pdev); +extern bool pci_pri_stopped(struct pci_dev *pdev); +extern int pci_pri_status(struct pci_dev *pdev); + +#else /* CONFIG_PCI_PRI */ + +static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) +{ + return -ENODEV; +} + +static inline void pci_disable_pri(struct pci_dev *pdev) +{ +} + +static inline bool pci_pri_enabled(struct pci_dev *pdev) +{ + return false; +} + +static inline int pci_reset_pri(struct pci_dev *pdev) +{ + return -ENODEV; +} + +static inline bool pci_pri_stopped(struct pci_dev *pdev) +{ + return true; +} + +static inline int pci_pri_status(struct pci_dev *pdev) +{ + return -ENODEV; +} +#endif /* CONFIG_PCI_PRI */ + #endif /* LINUX_PCI_ATS_H*/ diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e8840964aca..7fc32aff94d 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -663,6 +663,18 @@ #define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ #define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */ +/* Page Request Interface */ +#define PCI_PRI_CAP 0x13 /* PRI capability ID */ +#define PCI_PRI_CONTROL_OFF 0x04 /* Offset of control register */ +#define PCI_PRI_STATUS_OFF 0x06 /* Offset of status register */ +#define PCI_PRI_ENABLE 0x0001 /* Enable mask */ +#define PCI_PRI_RESET 0x0002 /* Reset bit mask */ +#define PCI_PRI_STATUS_RF 0x0001 /* Request Failure */ +#define PCI_PRI_STATUS_UPRGI 0x0002 /* Unexpected PRG index */ +#define PCI_PRI_STATUS_STOPPED 0x0100 /* PRI Stopped */ +#define PCI_PRI_MAX_REQ_OFF 0x08 /* Cap offset for max reqs supported */ +#define PCI_PRI_ALLOC_REQ_OFF 0x0c /* Cap offset for max reqs allowed */ + /* Single Root I/O Virtualization */ #define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ #define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ -- cgit v1.2.3-18-g5258 From 086ac11f6435c9dc2fe5025fc8ea3a1dbca273d6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 27 Sep 2011 15:57:16 +0200 Subject: PCI: Add support for PASID capability Devices supporting Process Address Space Identifiers (PASIDs) can use an IOMMU to access multiple IO address spaces at the same time. A PCIe device indicates support for this feature by implementing the PASID capability. This patch adds support for the capability to the Linux kernel. Reviewed-by: Bjorn Helgaas Signed-off-by: Joerg Roedel Signed-off-by: Jesse Barnes --- drivers/pci/Kconfig | 13 ++++++ drivers/pci/ats.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci-ats.h | 31 +++++++++++++ include/linux/pci_regs.h | 8 ++++ 4 files changed, 165 insertions(+) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index fb1e9707f91..cec66064ee4 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -94,6 +94,19 @@ config PCI_PRI If unsure, say N. +config PCI_PASID + bool "PCI PASID support" + depends on PCI + select PCI_ATS + help + Process Address Space Identifiers (PASIDs) can be used by PCI devices + to access more than one IO address space at the same time. To make + use of this feature an IOMMU is required which also supports PASIDs. + Select this option if you have such an IOMMU and want to compile the + driver for it into your kernel. + + If unsure, say N. + config PCI_IOAPIC bool depends on PCI diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index bf892a025d4..f727a09eb72 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -7,6 +7,7 @@ * PCI Express I/O Virtualization (IOV) support. * Address Translation Service 1.0 * Page Request Interface added by Joerg Roedel + * PASID support added by Joerg Roedel */ #include @@ -323,3 +324,115 @@ int pci_pri_status(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(pci_pri_status); #endif /* CONFIG_PCI_PRI */ + +#ifdef CONFIG_PCI_PASID +/** + * pci_enable_pasid - Enable the PASID capability + * @pdev: PCI device structure + * @features: Features to enable + * + * Returns 0 on success, negative value on error. This function checks + * whether the features are actually supported by the device and returns + * an error if not. + */ +int pci_enable_pasid(struct pci_dev *pdev, int features) +{ + u16 control, supported; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); + if (!pos) + return -EINVAL; + + pci_read_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, &control); + pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); + + if (!(supported & PCI_PASID_ENABLE)) + return -EINVAL; + + supported &= PCI_PASID_EXEC | PCI_PASID_PRIV; + + /* User wants to enable anything unsupported? */ + if ((supported & features) != features) + return -EINVAL; + + control = PCI_PASID_ENABLE | features; + + pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_enable_pasid); + +/** + * pci_disable_pasid - Disable the PASID capability + * @pdev: PCI device structure + * + */ +void pci_disable_pasid(struct pci_dev *pdev) +{ + u16 control = 0; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); + if (!pos) + return; + + pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control); +} +EXPORT_SYMBOL_GPL(pci_disable_pasid); + +/** + * pci_pasid_features - Check which PASID features are supported + * @pdev: PCI device structure + * + * Returns a negative value when no PASI capability is present. + * Otherwise is returns a bitmask with supported features. Current + * features reported are: + * PCI_PASID_ENABLE - PASID capability can be enabled + * PCI_PASID_EXEC - Execute permission supported + * PCI_PASID_PRIV - Priviledged mode supported + */ +int pci_pasid_features(struct pci_dev *pdev) +{ + u16 supported; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); + if (!pos) + return -EINVAL; + + pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); + + supported &= PCI_PASID_ENABLE | PCI_PASID_EXEC | PCI_PASID_PRIV; + + return supported; +} +EXPORT_SYMBOL_GPL(pci_pasid_features); + +#define PASID_NUMBER_SHIFT 8 +#define PASID_NUMBER_MASK (0x1f << PASID_NUMBER_SHIFT) +/** + * pci_max_pasid - Get maximum number of PASIDs supported by device + * @pdev: PCI device structure + * + * Returns negative value when PASID capability is not present. + * Otherwise it returns the numer of supported PASIDs. + */ +int pci_max_pasids(struct pci_dev *pdev) +{ + u16 supported; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); + if (!pos) + return -EINVAL; + + pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); + + supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT; + + return (1 << supported); +} +EXPORT_SYMBOL_GPL(pci_max_pasids); +#endif /* CONFIG_PCI_PASID */ diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 071395251ab..e3d0b389024 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -93,4 +93,35 @@ static inline int pci_pri_status(struct pci_dev *pdev) } #endif /* CONFIG_PCI_PRI */ +#ifdef CONFIG_PCI_PASID + +extern int pci_enable_pasid(struct pci_dev *pdev, int features); +extern void pci_disable_pasid(struct pci_dev *pdev); +extern int pci_pasid_features(struct pci_dev *pdev); +extern int pci_max_pasids(struct pci_dev *pdev); + +#else /* CONFIG_PCI_PASID */ + +static inline int pci_enable_pasid(struct pci_dev *pdev, int features) +{ + return -EINVAL; +} + +static inline void pci_disable_pasid(struct pci_dev *pdev) +{ +} + +static inline int pci_pasid_features(struct pci_dev *pdev) +{ + return -EINVAL; +} + +static inline int pci_max_pasids(struct pci_dev *pdev) +{ + return -EINVAL; +} + +#endif /* CONFIG_PCI_PASID */ + + #endif /* LINUX_PCI_ATS_H*/ diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 7fc32aff94d..b5d9657f310 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -675,6 +675,14 @@ #define PCI_PRI_MAX_REQ_OFF 0x08 /* Cap offset for max reqs supported */ #define PCI_PRI_ALLOC_REQ_OFF 0x0c /* Cap offset for max reqs allowed */ +/* PASID capability */ +#define PCI_PASID_CAP 0x1b /* PASID capability ID */ +#define PCI_PASID_CAP_OFF 0x04 /* PASID feature register */ +#define PCI_PASID_CONTROL_OFF 0x06 /* PASID control register */ +#define PCI_PASID_ENABLE 0x01 /* Enable/Supported bit */ +#define PCI_PASID_EXEC 0x02 /* Exec permissions Enable/Supported */ +#define PCI_PASID_PRIV 0x04 /* Priviledge Mode Enable/Support */ + /* Single Root I/O Virtualization */ #define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ #define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ -- cgit v1.2.3-18-g5258 From d387a8d66670371e6be3b6d6bde2e38b8cade076 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 14 Oct 2011 14:56:13 -0500 Subject: PCI: Workaround for Intel MPS errata Intel 5000 and 5100 series memory controllers have a known issue if read completion coalescing is enabled and the PCI-E Maximum Payload Size is set to 256B. To work around this issue, disable read completion coalescing in the memory controller and root complexes. Unfortunately, it must always be disabled, even if no 256B MPS devices are present, due to the possibility of one being hotplugged. Links to erratas: http://www.intel.com/content/dam/doc/specification-update/5000-chipset-memory-controller-hub-specification-update.pdf http://www.intel.com/content/dam/doc/specification-update/5100-memory-controller-hub-chipset-specification-update.pdf Thanks to Jesse Brandeburg and Ben Hutchings for providing insight into the problem. Tested-and-Reported-by: Avi Kivity Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e5aadf357ae..dca58b9ba8c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2836,6 +2836,75 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE, PCI_DEVICE_ID_SOLARFLARE_SFC4000B, fixup_mpss_256); +/* Intel 5000 and 5100 Memory controllers have an errata with read completion + * coalescing (which is enabled by default on some BIOSes) and MPS of 256B. + * Since there is no way of knowing what the PCIE MPS on each fabric will be + * until all of the devices are discovered and buses walked, read completion + * coalescing must be disabled. Unfortunately, it cannot be re-enabled because + * it is possible to hotplug a device with MPS of 256B. + */ +static void __devinit quirk_intel_mc_errata(struct pci_dev *dev) +{ + int err; + u16 rcc; + + if (pcie_bus_config == PCIE_BUS_TUNE_OFF) + return; + + /* Intel errata specifies bits to change but does not say what they are. + * Keeping them magical until such time as the registers and values can + * be explained. + */ + err = pci_read_config_word(dev, 0x48, &rcc); + if (err) { + dev_err(&dev->dev, "Error attempting to read the read " + "completion coalescing register.\n"); + return; + } + + if (!(rcc & (1 << 10))) + return; + + rcc &= ~(1 << 10); + + err = pci_write_config_word(dev, 0x48, rcc); + if (err) { + dev_err(&dev->dev, "Error attempting to write the read " + "completion coalescing register.\n"); + return; + } + + pr_info_once("Read completion coalescing disabled due to hardware " + "errata relating to 256B MPS.\n"); +} +/* Intel 5000 series memory controllers and ports 2-7 */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25c0, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25d0, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25d4, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25d8, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e2, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e3, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e4, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e5, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e6, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e7, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25f7, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25f8, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25f9, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25fa, quirk_intel_mc_errata); +/* Intel 5100 series memory controllers and ports 2-7 */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65c0, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e2, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e3, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e4, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e5, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e6, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e7, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f7, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f8, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f9, quirk_intel_mc_errata); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65fa, quirk_intel_mc_errata); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { -- cgit v1.2.3-18-g5258 From 62f392ea5b5f87b641e16e61a4cedda21ef7341f Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 14 Oct 2011 14:56:14 -0500 Subject: PCI: enable MPS "performance" setting to properly handle bridge MPS Rework the "performance" MPS option to configure the device MPS with the smaller of the device MPSS or the bridge MPS (which is assumed to be properly configured at this point to the largest allowable MPS based on its parent bus). Also, rework the MRRS setting to report an inability to set the MRRS to a valid setting. Signed-off-by: Jon Mason Acked-by: Benjamin Herrenschmidt Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 55 +++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 6ab6bd3df4b..48294243985 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1363,31 +1363,25 @@ static int pcie_find_smpss(struct pci_dev *dev, void *data) static void pcie_write_mps(struct pci_dev *dev, int mps) { - int rc, dev_mpss; - - dev_mpss = 128 << dev->pcie_mpss; + int rc; if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { - if (dev->bus->self) { - dev_dbg(&dev->bus->dev, "Bus MPSS %d\n", - 128 << dev->bus->self->pcie_mpss); + mps = 128 << dev->pcie_mpss; - /* For "MPS Force Max", the assumption is made that + if (dev->pcie_type != PCI_EXP_TYPE_ROOT_PORT && dev->bus->self) + /* For "Performance", the assumption is made that * downstream communication will never be larger than * the MRRS. So, the MPS only needs to be configured * for the upstream communication. This being the case, * walk from the top down and set the MPS of the child * to that of the parent bus. + * + * Configure the device MPS with the smaller of the + * device MPSS or the bridge MPS (which is assumed to be + * properly configured at this point to the largest + * allowable MPS based on its parent bus). */ - mps = 128 << dev->bus->self->pcie_mpss; - if (mps > dev_mpss) - dev_warn(&dev->dev, "MPS configured higher than" - " maximum supported by the device. If" - " a bus issue occurs, try running with" - " pci=pcie_bus_safe.\n"); - } - - dev->pcie_mpss = ffs(mps) - 8; + mps = min(mps, pcie_get_mps(dev->bus->self)); } rc = pcie_set_mps(dev, mps); @@ -1395,25 +1389,22 @@ static void pcie_write_mps(struct pci_dev *dev, int mps) dev_err(&dev->dev, "Failed attempting to set the MPS\n"); } -static void pcie_write_mrrs(struct pci_dev *dev, int mps) +static void pcie_write_mrrs(struct pci_dev *dev) { - int rc, mrrs, dev_mpss; + int rc, mrrs; /* In the "safe" case, do not configure the MRRS. There appear to be * issues with setting MRRS to 0 on a number of devices. */ - if (pcie_bus_config != PCIE_BUS_PERFORMANCE) return; - dev_mpss = 128 << dev->pcie_mpss; - /* For Max performance, the MRRS must be set to the largest supported * value. However, it cannot be configured larger than the MPS the - * device or the bus can support. This assumes that the largest MRRS - * available on the device cannot be smaller than the device MPSS. + * device or the bus can support. This should already be properly + * configured by a prior call to pcie_write_mps. */ - mrrs = min(mps, dev_mpss); + mrrs = pcie_get_mps(dev); /* MRRS is a R/W register. Invalid values can be written, but a * subsequent read will verify if the value is acceptable or not. @@ -1421,16 +1412,18 @@ static void pcie_write_mrrs(struct pci_dev *dev, int mps) * shrink the value until it is acceptable to the HW. */ while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { - dev_warn(&dev->dev, "Attempting to modify the PCI-E MRRS value" - " to %d. If any issues are encountered, please try " - "running with pci=pcie_bus_safe\n", mrrs); rc = pcie_set_readrq(dev, mrrs); - if (rc) - dev_err(&dev->dev, - "Failed attempting to set the MRRS\n"); + if (!rc) + break; + dev_warn(&dev->dev, "Failed attempting to set the MRRS\n"); mrrs /= 2; } + + if (mrrs < 128) + dev_err(&dev->dev, "MRRS was unable to be configured with a " + "safe value. If problems are experienced, try running " + "with pci=pcie_bus_safe.\n"); } static int pcie_bus_configure_set(struct pci_dev *dev, void *data) @@ -1444,7 +1437,7 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data) pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); pcie_write_mps(dev, mps); - pcie_write_mrrs(dev, mps); + pcie_write_mrrs(dev); dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); -- cgit v1.2.3-18-g5258 From a1c473aa11e61bc871be16279c9bf976acf22504 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 14 Oct 2011 14:56:15 -0500 Subject: pci: Clamp pcie_set_readrq() when using "performance" settings When configuring the PCIe settings for "performance", we allow parents to have a larger Max Payload Size than children and rely on children Max Read Request Size to not be larger than their own MPS to avoid having the host bridge generate responses they can't cope with. However, various drivers in Linux call pci_set_readrq() with arbitrary values, assuming this to be a simple performance tweak. This breaks under our "performance" configuration. Fix that by making sure the value programmed by pcie_set_readrq() is never larger than the configured MPS for that device. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7cd417e9405..6f45a73c6e9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3202,8 +3202,6 @@ int pcie_set_readrq(struct pci_dev *dev, int rq) if (rq < 128 || rq > 4096 || !is_power_of_2(rq)) goto out; - v = (ffs(rq) - 8) << 12; - cap = pci_pcie_cap(dev); if (!cap) goto out; @@ -3211,6 +3209,22 @@ int pcie_set_readrq(struct pci_dev *dev, int rq) err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); if (err) goto out; + /* + * If using the "performance" PCIe config, we clamp the + * read rq size to the max packet size to prevent the + * host bridge generating requests larger than we can + * cope with + */ + if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { + int mps = pcie_get_mps(dev); + + if (mps < 0) + return mps; + if (mps < rq) + rq = mps; + } + + v = (ffs(rq) - 8) << 12; if ((ctl & PCI_EXP_DEVCTL_READRQ) != v) { ctl &= ~PCI_EXP_DEVCTL_READRQ; -- cgit v1.2.3-18-g5258 From a513a99a7cebfb452839cc09c9c0586f72d96414 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 14 Oct 2011 14:56:16 -0500 Subject: PCI: Clean-up MPS debug output Clean-up MPS debug output to make it a single line and aligned, thus making it more readable for a large number of buses and devices in a single system. Suggested by Benjamin Herrenschmidt Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 48294243985..04e74f48571 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1428,24 +1428,25 @@ static void pcie_write_mrrs(struct pci_dev *dev) static int pcie_bus_configure_set(struct pci_dev *dev, void *data) { - int mps = 128 << *(u8 *)data; + int mps, orig_mps; if (!pci_is_pcie(dev)) return 0; - dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", - pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); + mps = 128 << *(u8 *)data; + orig_mps = pcie_get_mps(dev); pcie_write_mps(dev, mps); pcie_write_mrrs(dev); - dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", - pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); + dev_info(&dev->dev, "PCI-E Max Payload Size set to %4d/%4d (was %4d), " + "Max Read Rq %4d\n", pcie_get_mps(dev), 128 << dev->pcie_mpss, + orig_mps, pcie_get_readrq(dev)); return 0; } -/* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down, +/* pcie_bus_configure_settings requires that pci_walk_bus work in a top-down, * parents then children fashion. If this changes, then this code will not * work as designed. */ -- cgit v1.2.3-18-g5258