diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-20 10:07:25 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-20 10:07:25 -0800 |
commit | 787314c35fbb97e02823a1b8eb8cfa58f366cd49 (patch) | |
tree | 3fe5a484c1846c80361217a726997484533e8344 | |
parent | 6491d4d02893d9787ba67279595990217177b351 (diff) | |
parent | 9c6ecf6a3ade2dc4b03a239af68058b22897af41 (diff) |
Merge tag 'iommu-updates-v3.8' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel:
"A few new features this merge-window. The most important one is
probably, that dma-debug now warns if a dma-handle is not checked with
dma_mapping_error by the device driver. This requires minor changes
to some architectures which make use of dma-debug. Most of these
changes have the respective Acks by the Arch-Maintainers.
Besides that there are updates to the AMD IOMMU driver for refactor
the IOMMU-Groups support and to make sure it does not trigger a
hardware erratum.
The OMAP changes (for which I pulled in a branch from Tony Lindgren's
tree) have a conflict in linux-next with the arm-soc tree. The
conflict is in the file arch/arm/mach-omap2/clock44xx_data.c which is
deleted in the arm-soc tree. It is safe to delete the file too so
solve the conflict. Similar changes are done in the arm-soc tree in
the common clock framework migration. A missing hunk from the patch
in the IOMMU tree will be submitted as a seperate patch when the
merge-window is closed."
* tag 'iommu-updates-v3.8' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (29 commits)
ARM: dma-mapping: support debug_dma_mapping_error
ARM: OMAP4: hwmod data: ipu and dsp to use parent clocks instead of leaf clocks
iommu/omap: Adapt to runtime pm
iommu/omap: Migrate to hwmod framework
iommu/omap: Keep mmu enabled when requested
iommu/omap: Remove redundant clock handling on ISR
iommu/amd: Remove obsolete comment
iommu/amd: Don't use 512GB pages
iommu/tegra: smmu: Move bus_set_iommu after probe for multi arch
iommu/tegra: gart: Move bus_set_iommu after probe for multi arch
iommu/tegra: smmu: Remove unnecessary PTC/TLB flush all
tile: dma_debug: add debug_dma_mapping_error support
sh: dma_debug: add debug_dma_mapping_error support
powerpc: dma_debug: add debug_dma_mapping_error support
mips: dma_debug: add debug_dma_mapping_error support
microblaze: dma-mapping: support debug_dma_mapping_error
ia64: dma_debug: add debug_dma_mapping_error support
c6x: dma_debug: add debug_dma_mapping_error support
ARM64: dma_debug: add debug_dma_mapping_error support
intel-iommu: Prevent devices with RMRRs from being placed into SI Domain
...
27 files changed, 477 insertions, 272 deletions
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index a0b6250add7..4a4fb295cee 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -468,11 +468,46 @@ To map a single region, you do: size_t size = buffer->len; dma_handle = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } and to unmap it: dma_unmap_single(dev, dma_handle, size, direction); +You should call dma_mapping_error() as dma_map_single() could fail and return +error. Not all dma implementations support dma_mapping_error() interface. +However, it is a good practice to call dma_mapping_error() interface, which +will invoke the generic mapping error check interface. Doing so will ensure +that the mapping code will work correctly on all dma implementations without +any dependency on the specifics of the underlying implementation. Using the +returned address without checking for errors could result in failures ranging +from panics to silent data corruption. Couple of example of incorrect ways to +check for errors that make assumptions about the underlying dma implementation +are as follows and these are applicable to dma_map_page() as well. + +Incorrect example 1: + dma_addr_t dma_handle; + + dma_handle = dma_map_single(dev, addr, size, direction); + if ((dma_handle & 0xffff != 0) || (dma_handle >= 0x1000000)) { + goto map_error; + } + +Incorrect example 2: + dma_addr_t dma_handle; + + dma_handle = dma_map_single(dev, addr, size, direction); + if (dma_handle == DMA_ERROR_CODE) { + goto map_error; + } + You should call dma_unmap_single when the DMA activity is finished, e.g. from the interrupt which told you that the DMA transfer is done. @@ -489,6 +524,14 @@ Specifically: size_t size = buffer->len; dma_handle = dma_map_page(dev, page, offset, size, direction); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } ... @@ -496,6 +539,12 @@ Specifically: Here, "offset" means byte offset within the given page. +You should call dma_mapping_error() as dma_map_page() could fail and return +error as outlined under the dma_map_single() discussion. + +You should call dma_unmap_page when the DMA activity is finished, e.g. +from the interrupt which told you that the DMA transfer is done. + With scatterlists, you map a region gathered from several regions by: int i, count = dma_map_sg(dev, sglist, nents, direction); @@ -578,6 +627,14 @@ to use the dma_sync_*() interfaces. dma_addr_t mapping; mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } cp->rx_buf = buffer; cp->rx_len = len; @@ -658,6 +715,75 @@ failure can be determined by: * delay and try again later or * reset driver. */ + goto map_error_handling; + } + +- unmap pages that are already mapped, when mapping error occurs in the middle + of a multiple page mapping attempt. These example are applicable to + dma_map_page() as well. + +Example 1: + dma_addr_t dma_handle1; + dma_addr_t dma_handle2; + + dma_handle1 = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_handle1)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling1; + } + dma_handle2 = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_handle2)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling2; + } + + ... + + map_error_handling2: + dma_unmap_single(dma_handle1); + map_error_handling1: + +Example 2: (if buffers are allocated a loop, unmap all mapped buffers when + mapping error is detected in the middle) + + dma_addr_t dma_addr; + dma_addr_t array[DMA_BUFFERS]; + int save_index = 0; + + for (i = 0; i < DMA_BUFFERS; i++) { + + ... + + dma_addr = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_addr)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } + array[i].dma_addr = dma_addr; + save_index++; + } + + ... + + map_error_handling: + + for (i = 0; i < save_index; i++) { + + ... + + dma_unmap_single(array[i].dma_addr); } Networking drivers must call dev_kfree_skb to free the socket buffer diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 66bd97a95f1..78a6c569d20 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -678,3 +678,15 @@ out of dma_debug_entries. These entries are preallocated at boot. The number of preallocated entries is defined per architecture. If it is too low for you boot with 'dma_debug_entries=<your_desired_number>' to overwrite the architectural default. + +void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr); + +dma-debug interface debug_dma_mapping_error() to debug drivers that fail +to check dma mapping errors on addresses returned by dma_map_single() and +dma_map_page() interfaces. This interface clears a flag set by +debug_dma_map_page() to indicate that dma_mapping_error() has been called by +the driver. When driver does unmap, debug_dma_unmap() checks the flag and if +this flag is still set, prints warning message that includes call trace that +leads up to the unmap. This interface can be called from dma_mapping_error() +routines to enable dma mapping error check debugging. + diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 67d06324e74..5b579b95150 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -91,6 +91,7 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return dma_addr == DMA_ERROR_CODE; } diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index 4abb8b5e9bc..5e304d0719a 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -226,7 +226,7 @@ static struct platform_device omap3isp_device = { }; static struct omap_iommu_arch_data omap3_isp_iommu = { - .name = "isp", + .name = "mmu_isp", }; int omap3_init_camera(struct isp_platform_data *pdata) diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c index a6a4ff8744b..7642fc4672c 100644 --- a/arch/arm/mach-omap2/omap-iommu.c +++ b/arch/arm/mach-omap2/omap-iommu.c @@ -12,153 +12,60 @@ #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/err.h> +#include <linux/slab.h> #include <linux/platform_data/iommu-omap.h> +#include <plat/omap_hwmod.h> +#include <plat/omap_device.h> -#include "soc.h" -#include "common.h" - -struct iommu_device { - resource_size_t base; - int irq; - struct iommu_platform_data pdata; - struct resource res[2]; -}; -static struct iommu_device *devices; -static int num_iommu_devices; - -#ifdef CONFIG_ARCH_OMAP3 -static struct iommu_device omap3_devices[] = { - { - .base = 0x480bd400, - .irq = 24 + OMAP_INTC_START, - .pdata = { - .name = "isp", - .nr_tlb_entries = 8, - .clk_name = "cam_ick", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, -#if defined(CONFIG_OMAP_IOMMU_IVA2) - { - .base = 0x5d000000, - .irq = 28 + OMAP_INTC_START, - .pdata = { - .name = "iva2", - .nr_tlb_entries = 32, - .clk_name = "iva2_ck", - .da_start = 0x11000000, - .da_end = 0xFFFFF000, - }, - }, -#endif -}; -#define NR_OMAP3_IOMMU_DEVICES ARRAY_SIZE(omap3_devices) -static struct platform_device *omap3_iommu_pdev[NR_OMAP3_IOMMU_DEVICES]; -#else -#define omap3_devices NULL -#define NR_OMAP3_IOMMU_DEVICES 0 -#define omap3_iommu_pdev NULL -#endif - -#ifdef CONFIG_ARCH_OMAP4 -static struct iommu_device omap4_devices[] = { - { - .base = OMAP4_MMU1_BASE, - .irq = 100 + OMAP44XX_IRQ_GIC_START, - .pdata = { - .name = "ducati", - .nr_tlb_entries = 32, - .clk_name = "ipu_fck", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, - { - .base = OMAP4_MMU2_BASE, - .irq = 28 + OMAP44XX_IRQ_GIC_START, - .pdata = { - .name = "tesla", - .nr_tlb_entries = 32, - .clk_name = "dsp_fck", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, -}; -#define NR_OMAP4_IOMMU_DEVICES ARRAY_SIZE(omap4_devices) -static struct platform_device *omap4_iommu_pdev[NR_OMAP4_IOMMU_DEVICES]; -#else -#define omap4_devices NULL -#define NR_OMAP4_IOMMU_DEVICES 0 -#define omap4_iommu_pdev NULL -#endif - -static struct platform_device **omap_iommu_pdev; - -static int __init omap_iommu_init(void) +static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused) { - int i, err; - struct resource res[] = { - { .flags = IORESOURCE_MEM }, - { .flags = IORESOURCE_IRQ }, - }; + struct platform_device *pdev; + struct iommu_platform_data *pdata; + struct omap_mmu_dev_attr *a = (struct omap_mmu_dev_attr *)oh->dev_attr; + static int i; + + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + pdata->name = oh->name; + pdata->nr_tlb_entries = a->nr_tlb_entries; + pdata->da_start = a->da_start; + pdata->da_end = a->da_end; + + if (oh->rst_lines_cnt == 1) { + pdata->reset_name = oh->rst_lines->name; + pdata->assert_reset = omap_device_assert_hardreset; + pdata->deassert_reset = omap_device_deassert_hardreset; + } - if (cpu_is_omap34xx()) { - devices = omap3_devices; - omap_iommu_pdev = omap3_iommu_pdev; - num_iommu_devices = NR_OMAP3_IOMMU_DEVICES; - } else if (cpu_is_omap44xx()) { - devices = omap4_devices; - omap_iommu_pdev = omap4_iommu_pdev; - num_iommu_devices = NR_OMAP4_IOMMU_DEVICES; - } else - return -ENODEV; + pdev = omap_device_build("omap-iommu", i, oh, pdata, sizeof(*pdata), + NULL, 0, 0); - for (i = 0; i < num_iommu_devices; i++) { - struct platform_device *pdev; - const struct iommu_device *d = &devices[i]; + kfree(pdata); - pdev = platform_device_alloc("omap-iommu", i); - if (!pdev) { - err = -ENOMEM; - goto err_out; - } + if (IS_ERR(pdev)) { + pr_err("%s: device build err: %ld\n", __func__, PTR_ERR(pdev)); + return PTR_ERR(pdev); + } - res[0].start = d->base; - res[0].end = d->base + MMU_REG_SIZE - 1; - res[1].start = res[1].end = d->irq; + i++; - err = platform_device_add_resources(pdev, res, - ARRAY_SIZE(res)); - if (err) - goto err_out; - err = platform_device_add_data(pdev, &d->pdata, - sizeof(d->pdata)); - if (err) - goto err_out; - err = platform_device_add(pdev); - if (err) - goto err_out; - omap_iommu_pdev[i] = pdev; - } return 0; +} -err_out: - while (i--) - platform_device_put(omap_iommu_pdev[i]); - return err; +static int __init omap_iommu_init(void) +{ + return omap_hwmod_for_each_by_class("mmu", omap_iommu_dev_init, NULL); } /* must be ready before omap3isp is probed */ subsys_initcall(omap_iommu_init); static void __exit omap_iommu_exit(void) { - int i; - - for (i = 0; i < num_iommu_devices; i++) - platform_device_unregister(omap_iommu_pdev[i]); + /* Do nothing */ } module_exit(omap_iommu_exit); diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index 272b0178dba..f9fab942d5b 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -653,7 +653,7 @@ static struct omap_hwmod omap44xx_dsp_hwmod = { .mpu_irqs = omap44xx_dsp_irqs, .rst_lines = omap44xx_dsp_resets, .rst_lines_cnt = ARRAY_SIZE(omap44xx_dsp_resets), - .main_clk = "dsp_fck", + .main_clk = "dpll_iva_m4x2_ck", .prcm = { .omap4 = { .clkctrl_offs = OMAP4_CM_TESLA_TESLA_CLKCTRL_OFFSET, @@ -1679,7 +1679,7 @@ static struct omap_hwmod omap44xx_ipu_hwmod = { .mpu_irqs = omap44xx_ipu_irqs, .rst_lines = omap44xx_ipu_resets, .rst_lines_cnt = ARRAY_SIZE(omap44xx_ipu_resets), - .main_clk = "ipu_fck", + .main_clk = "ducati_clk_mux_ck", .prcm = { .omap4 = { .clkctrl_offs = OMAP4_CM_DUCATI_DUCATI_CLKCTRL_OFFSET, diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 538f4b44db5..99477689419 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -50,6 +50,7 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dev_addr); return ops->mapping_error(dev, dev_addr); } diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h index 03579fd99db..3c694065030 100644 --- a/arch/c6x/include/asm/dma-mapping.h +++ b/arch/c6x/include/asm/dma-mapping.h @@ -32,6 +32,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask) */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return dma_addr == ~0; } diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 4f5e8148440..cf3ab7e784b 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -58,6 +58,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) { struct dma_map_ops *ops = platform_dma_get_ops(dev); + debug_dma_mapping_error(dev, daddr); return ops->mapping_error(dev, daddr); } diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index 01d228286cb..46460f1c49c 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h @@ -114,6 +114,8 @@ static inline void __dma_sync(unsigned long paddr, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + + debug_dma_mapping_error(dev, dma_addr); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index be39a12901c..006b43e38a9 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -40,6 +40,8 @@ static inline int dma_supported(struct device *dev, u64 mask) static inline int dma_mapping_error(struct device *dev, u64 mask) { struct dma_map_ops *ops = get_dma_ops(dev); + + debug_dma_mapping_error(dev, mask); return ops->mapping_error(dev, mask); } diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 78160874809..e27e9ad6818 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -172,6 +172,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *dma_ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (dma_ops->mapping_error) return dma_ops->mapping_error(dev, dma_addr); diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index 8bd965e00a1..b437f2c780b 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -46,6 +46,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 8493fd3c7ba..05fe53f5346 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -59,6 +59,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return (dma_addr == DMA_ERROR_CODE); } diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 4b6247d1a31..f2ff191376b 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -72,6 +72,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return get_dma_ops(dev)->mapping_error(dev, dma_addr); } diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index f7b4c7903e7..808dae63eee 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -47,6 +47,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 55074cba20e..c1c74e030a5 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -57,17 +57,9 @@ * physically contiguous memory regions it is mapping into page sizes * that we support. * - * Traditionally the IOMMU core just handed us the mappings directly, - * after making sure the size is an order of a 4KiB page and that the - * mapping has natural alignment. - * - * To retain this behavior, we currently advertise that we support - * all page sizes that are an order of 4KiB. - * - * If at some point we'd like to utilize the IOMMU core's new behavior, - * we could change this to advertise the real page sizes we support. + * 512GB Pages are not supported due to a hardware bug */ -#define AMD_IOMMU_PGSIZES (~0xFFFUL) +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -140,6 +132,9 @@ static void free_dev_data(struct iommu_dev_data *dev_data) list_del(&dev_data->dev_data_list); spin_unlock_irqrestore(&dev_data_list_lock, flags); + if (dev_data->group) + iommu_group_put(dev_data->group); + kfree(dev_data); } @@ -274,41 +269,23 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) *from = to; } -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - -static int iommu_init_device(struct device *dev) +static struct pci_bus *find_hosted_bus(struct pci_bus *bus) { - struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev); - struct iommu_dev_data *dev_data; - struct iommu_group *group; - u16 alias; - int ret; - - if (dev->archdata.iommu) - return 0; - - dev_data = find_dev_data(get_device_id(dev)); - if (!dev_data) - return -ENOMEM; - - alias = amd_iommu_alias_table[dev_data->devid]; - if (alias != dev_data->devid) { - struct iommu_dev_data *alias_data; + while (!bus->self) { + if (!pci_is_root_bus(bus)) + bus = bus->parent; + else + return ERR_PTR(-ENODEV); + } - alias_data = find_dev_data(alias); - if (alias_data == NULL) { - pr_err("AMD-Vi: Warning: Unhandled device %s\n", - dev_name(dev)); - free_dev_data(dev_data); - return -ENOTSUPP; - } - dev_data->alias_data = alias_data; + return bus; +} - dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - } +#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - if (dma_pdev == NULL) - dma_pdev = pci_dev_get(pdev); +static struct pci_dev *get_isolation_root(struct pci_dev *pdev) +{ + struct pci_dev *dma_pdev = pdev; /* Account for quirked devices */ swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); @@ -330,14 +307,9 @@ static int iommu_init_device(struct device *dev) * Finding the next device may require skipping virtual buses. */ while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = dma_pdev->bus; - - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - goto root_bus; - } + struct pci_bus *bus = find_hosted_bus(dma_pdev->bus); + if (IS_ERR(bus)) + break; if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) break; @@ -345,19 +317,137 @@ static int iommu_init_device(struct device *dev) swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); } -root_bus: - group = iommu_group_get(&dma_pdev->dev); - pci_dev_put(dma_pdev); + return dma_pdev; +} + +static int use_pdev_iommu_group(struct pci_dev *pdev, struct device *dev) +{ + struct iommu_group *group = iommu_group_get(&pdev->dev); + int ret; + if (!group) { group = iommu_group_alloc(); if (IS_ERR(group)) return PTR_ERR(group); + + WARN_ON(&pdev->dev != dev); } ret = iommu_group_add_device(group, dev); - iommu_group_put(group); + return ret; +} + +static int use_dev_data_iommu_group(struct iommu_dev_data *dev_data, + struct device *dev) +{ + if (!dev_data->group) { + struct iommu_group *group = iommu_group_alloc(); + if (IS_ERR(group)) + return PTR_ERR(group); + + dev_data->group = group; + } + + return iommu_group_add_device(dev_data->group, dev); +} + +static int init_iommu_group(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct iommu_group *group; + struct pci_dev *dma_pdev; + int ret; + + group = iommu_group_get(dev); + if (group) { + iommu_group_put(group); + return 0; + } + + dev_data = find_dev_data(get_device_id(dev)); + if (!dev_data) + return -ENOMEM; + + if (dev_data->alias_data) { + u16 alias; + struct pci_bus *bus; + + if (dev_data->alias_data->group) + goto use_group; + + /* + * If the alias device exists, it's effectively just a first + * level quirk for finding the DMA source. + */ + alias = amd_iommu_alias_table[dev_data->devid]; + dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); + if (dma_pdev) { + dma_pdev = get_isolation_root(dma_pdev); + goto use_pdev; + } + + /* + * If the alias is virtual, try to find a parent device + * and test whether the IOMMU group is actualy rooted above + * the alias. Be careful to also test the parent device if + * we think the alias is the root of the group. + */ + bus = pci_find_bus(0, alias >> 8); + if (!bus) + goto use_group; + + bus = find_hosted_bus(bus); + if (IS_ERR(bus) || !bus->self) + goto use_group; + + dma_pdev = get_isolation_root(pci_dev_get(bus->self)); + if (dma_pdev != bus->self || (dma_pdev->multifunction && + !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))) + goto use_pdev; + + pci_dev_put(dma_pdev); + goto use_group; + } + + dma_pdev = get_isolation_root(pci_dev_get(to_pci_dev(dev))); +use_pdev: + ret = use_pdev_iommu_group(dma_pdev, dev); + pci_dev_put(dma_pdev); + return ret; +use_group: + return use_dev_data_iommu_group(dev_data->alias_data, dev); +} + +static int iommu_init_device(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct iommu_dev_data *dev_data; + u16 alias; + int ret; + + if (dev->archdata.iommu) + return 0; + + dev_data = find_dev_data(get_device_id(dev)); + if (!dev_data) + return -ENOMEM; + + alias = amd_iommu_alias_table[dev_data->devid]; + if (alias != dev_data->devid) { + struct iommu_dev_data *alias_data; + + alias_data = find_dev_data(alias); + if (alias_data == NULL) { + pr_err("AMD-Vi: Warning: Unhandled device %s\n", + dev_name(dev)); + free_dev_data(dev_data); + return -ENOTSUPP; + } + dev_data->alias_data = alias_data; + } + ret = init_iommu_group(dev); if (ret) return ret; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index c9aa3d079ff..e38ab438bb3 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -426,6 +426,7 @@ struct iommu_dev_data { struct iommu_dev_data *alias_data;/* The alias dev_ |