diff options
55 files changed, 945 insertions, 733 deletions
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index a8d01005f48..10a93696e55 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -82,7 +82,19 @@ Most of the hard work is done for the driver in the PCI layer. It simply has to request that the PCI layer set up the MSI capability for this device. -4.2.1 pci_enable_msi_range +4.2.1 pci_enable_msi + +int pci_enable_msi(struct pci_dev *dev) + +A successful call allocates ONE interrupt to the device, regardless +of how many MSIs the device supports. The device is switched from +pin-based interrupt mode to MSI mode. The dev->irq number is changed +to a new number which represents the message signaled interrupt; +consequently, this function should be called before the driver calls +request_irq(), because an MSI is delivered via a vector that is +different from the vector of a pin-based interrupt. + +4.2.2 pci_enable_msi_range int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) @@ -147,6 +159,11 @@ static int foo_driver_enable_msi(struct pci_dev *pdev, int nvec) return pci_enable_msi_range(pdev, nvec, nvec); } +Note, unlike pci_enable_msi_exact() function, which could be also used to +enable a particular number of MSI-X interrupts, pci_enable_msi_range() +returns either a negative errno or 'nvec' (not negative errno or 0 - as +pci_enable_msi_exact() does). + 4.2.1.3 Single MSI mode The most notorious example of the request type described above is @@ -158,7 +175,27 @@ static int foo_driver_enable_single_msi(struct pci_dev *pdev) return pci_enable_msi_range(pdev, 1, 1); } -4.2.2 pci_disable_msi +Note, unlike pci_enable_msi() function, which could be also used to +enable the single MSI mode, pci_enable_msi_range() returns either a +negative errno or 1 (not negative errno or 0 - as pci_enable_msi() +does). + +4.2.3 pci_enable_msi_exact + +int pci_enable_msi_exact(struct pci_dev *dev, int nvec) + +This variation on pci_enable_msi_range() call allows a device driver to +request exactly 'nvec' MSIs. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to request any more MSI interrupts for +this device. + +By contrast with pci_enable_msi_range() function, pci_enable_msi_exact() +returns zero in case of success, which indicates MSI interrupts have been +successfully allocated. + +4.2.4 pci_disable_msi void pci_disable_msi(struct pci_dev *dev) @@ -172,7 +209,7 @@ on any interrupt for which it previously called request_irq(). Failure to do so results in a BUG_ON(), leaving the device with MSI enabled and thus leaking its vector. -4.2.3 pci_msi_vec_count +4.2.4 pci_msi_vec_count int pci_msi_vec_count(struct pci_dev *dev) @@ -257,8 +294,8 @@ possible, likely up to the limit returned by pci_msix_vec_count() function: static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec) { - return pci_enable_msi_range(adapter->pdev, adapter->msix_entries, - 1, nvec); + return pci_enable_msix_range(adapter->pdev, adapter->msix_entries, + 1, nvec); } Note the value of 'minvec' parameter is 1. As 'minvec' is inclusive, @@ -269,8 +306,8 @@ In this case the function could look like this: static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec) { - return pci_enable_msi_range(adapter->pdev, adapter->msix_entries, - FOO_DRIVER_MINIMUM_NVEC, nvec); + return pci_enable_msix_range(adapter->pdev, adapter->msix_entries, + FOO_DRIVER_MINIMUM_NVEC, nvec); } 4.3.1.2 Exact number of MSI-X interrupts @@ -282,10 +319,15 @@ parameters: static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec) { - return pci_enable_msi_range(adapter->pdev, adapter->msix_entries, - nvec, nvec); + return pci_enable_msix_range(adapter->pdev, adapter->msix_entries, + nvec, nvec); } +Note, unlike pci_enable_msix_exact() function, which could be also used to +enable a particular number of MSI-X interrupts, pci_enable_msix_range() +returns either a negative errno or 'nvec' (not negative errno or 0 - as +pci_enable_msix_exact() does). + 4.3.1.3 Specific requirements to the number of MSI-X interrupts As noted above, there could be devices that can not operate with just any @@ -332,7 +374,64 @@ Note how pci_enable_msix_range() return value is analized for a fallback - any error code other than -ENOSPC indicates a fatal error and should not be retried. -4.3.2 pci_disable_msix +4.3.2 pci_enable_msix_exact + +int pci_enable_msix_exact(struct pci_dev *dev, + struct msix_entry *entries, int nvec) + +This variation on pci_enable_msix_range() call allows a device driver to +request exactly 'nvec' MSI-Xs. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to allocate any more MSI-X interrupts for +this device. + +By contrast with pci_enable_msix_range() function, pci_enable_msix_exact() +returns zero in case of success, which indicates MSI-X interrupts have been +successfully allocated. + +Another version of a routine that enables MSI-X mode for a device with +specific requirements described in chapter 4.3.1.3 might look like this: + +/* + * Assume 'minvec' and 'maxvec' are non-zero + */ +static int foo_driver_enable_msix(struct foo_adapter *adapter, + int minvec, int maxvec) +{ + int rc; + + minvec = roundup_pow_of_two(minvec); + maxvec = rounddown_pow_of_two(maxvec); + + if (minvec > maxvec) + return -ERANGE; + +retry: + rc = pci_enable_msix_exact(adapter->pdev, + adapter->msix_entries, maxvec); + + /* + * -ENOSPC is the only error code allowed to be analyzed + */ + if (rc == -ENOSPC) { + if (maxvec == 1) + return -ENOSPC; + + maxvec /= 2; + + if (minvec > maxvec) + return -ENOSPC; + + goto retry; + } else if (rc < 0) { + return rc; + } + + return maxvec; +} + +4.3.3 pci_disable_msix void pci_disable_msix(struct pci_dev *dev) diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt index 86551cc72e0..2d91ae25198 100644 --- a/Documentation/PCI/pci-iov-howto.txt +++ b/Documentation/PCI/pci-iov-howto.txt @@ -68,10 +68,6 @@ To disable SR-IOV capability: echo 0 > \ /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs -To notify SR-IOV core of Virtual Function Migration: -(a) In the driver: - irqreturn_t pci_sriov_migration(struct pci_dev *dev); - 3.2 Usage example Following piece of code illustrates the usage of the SR-IOV API. diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index 317da88ae65..d0d46786892 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -19,7 +19,7 @@ static int debug_pci; /* - * We can't use pci_find_device() here since we are + * We can't use pci_get_device() here since we are * called from interrupt context. */ static void pcibios_bus_report_status(struct pci_bus *bus, u_int status_mask, int warn) @@ -57,13 +57,10 @@ static void pcibios_bus_report_status(struct pci_bus *bus, u_int status_mask, in void pcibios_report_status(u_int status_mask, int warn) { - struct list_head *l; - - list_for_each(l, &pci_root_buses) { - struct pci_bus *bus = pci_bus_b(l); + struct pci_bus *bus; + list_for_each_entry(bus, &pci_root_buses, node) pcibios_bus_report_status(bus, status_mask, warn); - } } /* diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c index c2812176544..67b1d168575 100644 --- a/arch/frv/mb93090-mb00/pci-frv.c +++ b/arch/frv/mb93090-mb00/pci-frv.c @@ -88,7 +88,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) /* Depth-First Search on bus tree */ for (ln=bus_list->next; ln != bus_list; ln=ln->next) { - bus = pci_bus_b(ln); + bus = list_entry(ln, struct pci_bus, node); if ((dev = bus->self)) { for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { r = &dev->resource[idx]; diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 8e858b593e4..007361d59aa 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1140,11 +1140,13 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, #ifdef CONFIG_NUMA { + int node = ioc->node; struct page *page; - page = alloc_pages_exact_node(ioc->node == MAX_NUMNODES ? - numa_node_id() : ioc->node, flags, - get_order(size)); + if (node == NUMA_NO_NODE) + node = numa_node_id(); + + page = alloc_pages_exact_node(node, flags, get_order(size)); if (unlikely(!page)) return NULL; @@ -1914,7 +1916,7 @@ ioc_show(struct seq_file *s, void *v) seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); #ifdef CONFIG_NUMA - if (ioc->node != MAX_NUMNODES) + if (ioc->node != NUMA_NO_NODE) seq_printf(s, "NUMA node : %d\n", ioc->node); #endif seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); @@ -2015,31 +2017,19 @@ sba_connect_bus(struct pci_bus *bus) printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); } -#ifdef CONFIG_NUMA static void __init sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) { +#ifdef CONFIG_NUMA unsigned int node; - int pxm; - - ioc->node = MAX_NUMNODES; - - pxm = acpi_get_pxm(handle); - - if (pxm < 0) - return; - - node = pxm_to_node(pxm); - if (node >= MAX_NUMNODES || !node_online(node)) - return; + node = acpi_get_node(handle); + if (node != NUMA_NO_NODE && !node_online(node)) + node = NUMA_NO_NODE; ioc->node = node; - return; -} -#else -#define sba_map_ioc_to_node(ioc, handle) #endif +} static int __init acpi_sba_ioc_add(struct acpi_device *device, diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 71fbaaa495c..7d41cc08982 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -98,7 +98,7 @@ struct pci_controller { struct acpi_device *companion; void *iommu; int segment; - int node; /* nearest node with memory or -1 for global allocation */ + int node; /* nearest node with memory or NUMA_NO_NODE for global allocation */ void *platform_data; }; diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 07d209c9507..5a585ebe9df 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -803,14 +803,9 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) * ACPI based hotplug CPU support */ #ifdef CONFIG_ACPI_HOTPLUG_CPU -static -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA - int pxm_id; - int nid; - - pxm_id = acpi_get_pxm(handle); /* * We don't have cpu-only-node hotadd. But if the system equips * SRAT table, pxm is already found and node is ready. @@ -818,11 +813,10 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) * This code here is for the system which doesn't have full SRAT * table for possible cpus. */ - nid = acpi_map_pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; - node_cpuid[cpu].nid = nid; + node_cpuid[cpu].nid = acpi_get_node(handle); #endif - return (0); + return 0; } int additional_cpus __initdata = -1; @@ -929,7 +923,7 @@ static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth, union acpi_object *obj; struct acpi_madt_io_sapic *iosapic; unsigned int gsi_base; - int pxm, node; + int node; /* Only care about objects w/ a method that returns the MADT */ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) @@ -956,17 +950,9 @@ static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth, kfree(buffer.pointer); - /* - * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell - * us which node to associate this with. - */ - pxm = acpi_get_pxm(handle); - if (pxm < 0) - return AE_OK; - - node = pxm_to_node(pxm); - - if (node >= MAX_NUMNODES || !node_online(node) || + /* OK, it's an IOSAPIC MADT entry; associate it with a node */ + node = acpi_get_node(handle); + if (node == NUMA_NO_NODE || !node_online(node) || cpumask_empty(cpumask_of_node(node))) return AE_OK; diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c index 5dc969dd4ac..eee069a0b53 100644 --- a/arch/ia64/pci/fixup.c +++ b/arch/ia64/pci/fixup.c @@ -5,6 +5,7 @@ #include <linux/pci.h> #include <linux/init.h> +#include <linux/vgaarb.h> #include <asm/machvec.h> @@ -19,9 +20,10 @@ * IORESOURCE_ROM_SHADOW is used to associate the boot video * card with this copy. On laptops this copy has to be used since * the main ROM may be compressed or combined with another image. - * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW - * is marked here since the boot video device will be the only enabled - * video device at this point. + * See pci_map_rom() for use of this flag. Before marking the device + * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set + * by either arch cde or vga-arbitration, if so only apply the fixup to this + * already determined primary video card. */ static void pci_fixup_video(struct pci_dev *pdev) @@ -35,9 +37,6 @@ static void pci_fixup_video(struct pci_dev *pdev) return; /* Maybe, this machine supports legacy memory map. */ - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return; - /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { @@ -60,10 +59,14 @@ static void pci_fixup_video(struct pci_dev *pdev) } bus = bus->parent; } - pci_read_config_word(pdev, PCI_COMMAND, &config); - if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { - pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; - dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); + if (!vga_default_device() || pdev == vga_default_device()) { + pci_read_config_word(pdev, PCI_COMMAND, &config); + if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { + pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; + dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); + vga_set_default_device(pdev); + } } } -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 9e4938d8ca4..291a582777c 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -126,7 +126,6 @@ static struct pci_controller *alloc_pci_controller(int seg) return NULL; controller->segment = seg; - controller->node = -1; return controller; } @@ -430,19 +429,14 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) struct pci_root_info *info = NULL; int busnum = root->secondary.start; struct pci_bus *pbus; - int pxm, ret; + int ret; controller = alloc_pci_controller(domain); if (!controller) return NULL; controller->companion = device; - - pxm = acpi_get_pxm(device->handle); -#ifdef CONFIG_NUMA - if (pxm >= 0) - controller->node = pxm_to_node(pxm); -#endif + controller->node = acpi_get_node(device->handle); info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index a9e311f7a9d..2a4779091a5 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -208,7 +208,6 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, unsigned long in_devfn) { struct pci_controller* hose; - struct list_head *ln; struct pci_bus *bus = NULL; struct device_node *hose_node; @@ -230,8 +229,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, * used on pre-domains setup. We return the first match */ - for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { - bus = pci_bus_b(ln); + list_for_each_entry(bus, &pci_root_buses, node) { if (in_bus >= bus->number && in_bus <= bus->busn_res.end) break; bus = NULL; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index efe61374f6e..203cbf0dc10 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -37,15 +37,15 @@ find_bus_among_children(struct pci_bus *bus, struct device_node *dn) { struct pci_bus *child = NULL; - struct list_head *tmp; + struct pci_bus *tmp; struct device_node *busdn; busdn = pci_bus_to_OF_node(bus); if (busdn == dn) return bus; - list_for_each(tmp, &bus->children) { - child = find_bus_among_children(pci_bus_b(tmp), dn); + list_for_each_entry(tmp, &bus->children, node) { + child = find_bus_among_children(tmp, dn); if (child) break; }; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 1ac6114c9ea..96ae4f4040b 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -26,11 +26,6 @@ extern int pci_routeirq; extern int noioapicquirk; extern int noioapicreroute; -/* scan a bus after allocating a pci_sysdata for it */ -extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, - int node); -extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); - #ifdef CONFIG_PCI #ifdef CONFIG_PCI_DOMAINS @@ -70,7 +65,7 @@ extern unsigned long pci_mem_start; extern int pcibios_enabled; void pcibios_config_init(void); -struct pci_bus *pcibios_scan_root(int bus); +void pcibios_scan_root(int bus); void pcibios_set_master(struct pci_dev *dev); void pcibios_penalize_isa_irq(int irq, int active); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d35f24e231c..c840571afa4 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -131,6 +131,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) } struct pci_bus; +int x86_pci_root_bus_node(int bus); void x86_pci_root_bus_resources(int bus, struct list_head *resources); #ifdef CONFIG_SMP @@ -139,17 +140,4 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources); #define smt_capable() (smp_num_siblings > 1) #endif -#ifdef CONFIG_NUMA -extern int get_mp_bus_to_node(int busnum); -extern void set_mp_bus_to_node(int busnum, int node); -#else -static inline int get_mp_bus_to_node(int busnum) -{ - return 0; -} -static inline void set_mp_bus_to_node(int busnum, int node) -{ -} -#endif - #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 4f25ec07755..01edac6c5e1 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -218,9 +218,8 @@ static void teardown_mcfg_map(struct pci_root_info *info) } #endif -static acpi_status -resource_to_addr(struct acpi_resource *resource, - struct acpi_resource_address64 *addr) +static acpi_status resource_to_addr(struct acpi_resource *resource, + struct acpi_resource_address64 *addr) { acpi_status status; struct acpi_resource_memory24 *memory24; @@ -265,8 +264,7 @@ resource_to_addr(struct acpi_resource *resource, return AE_ERROR; } -static acpi_status -count_resource(struct acpi_resource *acpi_res, void *data) +static acpi_status count_resource(struct acpi_resource *acpi_res, void *data) { struct pci_root_info *info = data; struct acpi_resource_address64 addr; @@ -278,8 +276,7 @@ count_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } -static acpi_status -setup_resource(struct acpi_resource *acpi_res, void *data) +static acpi_status setup_resource(struct acpi_resource *acpi_res, void *data) { struct pci_root_info *info = data; struct resource *res; @@ -435,9 +432,9 @@ static void release_pci_root_info(struct pci_host_bridge *bridge) __release_pci_root_info(info); } -static void -probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, - |