aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv/pci-ioda.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv/pci-ioda.c')
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c225
1 files changed, 177 insertions, 48 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 74a5a5773b1..de19edeaa7a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/pci.h>
+#include <linux/crash_dump.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/string.h>
@@ -21,6 +22,7 @@
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/msi.h>
+#include <linux/memblock.h>
#include <asm/sections.h>
#include <asm/io.h>
@@ -70,6 +72,16 @@ define_pe_printk_level(pe_err, KERN_ERR);
define_pe_printk_level(pe_warn, KERN_WARNING);
define_pe_printk_level(pe_info, KERN_INFO);
+/*
+ * stdcix is only supposed to be used in hypervisor real mode as per
+ * the architecture spec
+ */
+static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
+{
+ __asm__ __volatile__("stdcix %0,0,%1"
+ : : "r" (val), "r" (paddr) : "memory");
+}
+
static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
{
unsigned long pe;
@@ -153,13 +165,23 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
rid_end = pe->rid + 1;
}
- /* Associate PE in PELT */
+ /*
+ * Associate PE in PELT. We need add the PE into the
+ * corresponding PELT-V as well. Otherwise, the error
+ * originated from the PE might contribute to other
+ * PEs.
+ */
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
bcomp, dcomp, fcomp, OPAL_MAP_PE);
if (rc) {
pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
return -ENXIO;
}
+
+ rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+ pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
+ if (rc)
+ pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc);
opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
@@ -322,7 +344,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
pci_name(dev));
continue;
}
- pci_dev_get(dev);
pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
pe->dma_weight += pnv_ioda_dma_weight(dev);
@@ -440,24 +461,57 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
return;
pe = &phb->ioda.pe_array[pdn->pe_number];
+ WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
set_iommu_table_base(&pdev->dev, &pe->tce32_table);
}
+static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
+ struct pci_dev *pdev, u64 dma_mask)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pnv_ioda_pe *pe;
+ uint64_t top;
+ bool bypass = false;
+
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return -ENODEV;;
+
+ pe = &phb->ioda.pe_array[pdn->pe_number];
+ if (pe->tce_bypass_enabled) {
+ top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+ bypass = (dma_mask >= top);
+ }
+
+ if (bypass) {
+ dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
+ set_dma_ops(&pdev->dev, &dma_direct_ops);
+ set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+ } else {
+ dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+ set_dma_ops(&pdev->dev, &dma_iommu_ops);
+ set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+ }
+ return 0;
+}
+
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
- set_iommu_table_base(&dev->dev, &pe->tce32_table);
+ set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
if (dev->subordinate)
pnv_ioda_setup_bus_dma(pe, dev->subordinate);
}
}
-static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
- u64 *startp, u64 *endp)
+static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
+ struct iommu_table *tbl,
+ __be64 *startp, __be64 *endp, bool rm)
{
- u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+ __be64 __iomem *invalidate = rm ?
+ (__be64 __iomem *)pe->tce_inval_reg_phys :
+ (__be64 __iomem *)tbl->it_index;
unsigned long start, end, inc;
start = __pa(startp);
@@ -484,7 +538,10 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
mb(); /* Ensure above stores are visible */
while (start <= end) {
- __raw_writeq(start, invalidate);
+ if (rm)
+ __raw_rm_writeq(cpu_to_be64(start), invalidate);
+ else
+ __raw_writeq(cpu_to_be64(start), invalidate);
start += inc;
}
@@ -496,10 +553,12 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
struct iommu_table *tbl,
- u64 *startp, u64 *endp)
+ __be64 *startp, __be64 *endp, bool rm)
{
unsigned long start, end, inc;
- u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+ __be64 __iomem *invalidate = rm ?
+ (__be64 __iomem *)pe->tce_inval_reg_phys :
+ (__be64 __iomem *)tbl->it_index;
/* We'll invalidate DMA address in PE scope */
start = 0x2ul << 60;
@@ -515,22 +574,25 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
mb();
while (start <= end) {
- __raw_writeq(start, invalidate);
+ if (rm)
+ __raw_rm_writeq(cpu_to_be64(start), invalidate);
+ else
+ __raw_writeq(cpu_to_be64(start), invalidate);
start += inc;
}
}
void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
- u64 *startp, u64 *endp)
+ __be64 *startp, __be64 *endp, bool rm)
{
struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
tce32_table);
struct pnv_phb *phb = pe->phb;
if (phb->type == PNV_PHB_IODA1)
- pnv_pci_ioda1_tce_invalidate(tbl, startp, endp);
+ pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
else
- pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp);
+ pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
}
static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
@@ -602,16 +664,18 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
* errors, and on the first pass the data will be a relative
* bus number, print that out instead.
*/
- tbl->it_busno = 0;
- tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
- tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
- TCE_PCI_SWINV_PAIR;
+ pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
+ tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
+ 8);
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE |
+ TCE_PCI_SWINV_FREE |
+ TCE_PCI_SWINV_PAIR);
}
iommu_init_table(tbl, phb->hose->node);
- iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
+ iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
if (pe->pdev)
- set_iommu_table_base(&pe->pdev->dev, tbl);
+ set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
else
pnv_ioda_setup_bus_dma(pe, pe->pbus);
@@ -624,6 +688,56 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
}
+static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+{
+ struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
+ tce32_table);
+ uint16_t window_id = (pe->pe_number << 1 ) + 1;
+ int64_t rc;
+
+ pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
+ if (enable) {
+ phys_addr_t top = memblock_end_of_DRAM();
+
+ top = roundup_pow_of_two(top);
+ rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+ pe->pe_number,
+ window_id,
+ pe->tce_bypass_base,
+ top);
+ } else {
+ rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+ pe->pe_number,
+ window_id,
+ pe->tce_bypass_base,
+ 0);
+
+ /*
+ * We might want to reset the DMA ops of all devices on
+ * this PE. However in theory, that shouldn't be necessary
+ * as this is used for VFIO/KVM pass-through and the device
+ * hasn't yet been returned to its kernel driver
+ */
+ }
+ if (rc)
+ pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
+ else
+ pe->tce_bypass_enabled = enable;
+}
+
+static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
+{
+ /* TVE #1 is selected by PCI address bit 59 */
+ pe->tce_bypass_base = 1ull << 59;
+
+ /* Install set_bypass callback for VFIO */
+ pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
+
+ /* Enable bypass by default */
+ pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
+}
+
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe)
{
@@ -680,17 +794,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
* errors, and on the first pass the data will be a relative
* bus number, print that out instead.
*/
- tbl->it_busno = 0;
- tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
- tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+ pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
+ tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
+ 8);
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
}
iommu_init_table(tbl, phb->hose->node);
+ iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
if (pe->pdev)
- set_iommu_table_base(&pe->pdev->dev, tbl);
+ set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
else
pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ /* Also create a bypass window */
+ pnv_pci_ioda2_setup_bypass_pe(phb, pe);
return;
fail:
if (pe->tce32_seg >= 0)
@@ -786,8 +904,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
struct irq_data *idata;
struct irq_chip *ichip;
unsigned int xive_num = hwirq - phb->msi_base;
- uint64_t addr64;
- uint32_t addr32, data;
+ __be32 data;
int rc;
/* No PE assigned ? bail out ... no MSI for you ! */
@@ -811,6 +928,8 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
}
if (is_64) {
+ __be64 addr64;
+
rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
&addr64, &data);
if (rc) {
@@ -818,9 +937,11 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
pci_name(dev), rc);
return -EIO;
}
- msg->address_hi = addr64 >> 32;
- msg->address_lo = addr64 & 0xfffffffful;
+ msg->address_hi = be64_to_cpu(addr64) >> 32;
+ msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
} else {
+ __be32 addr32;
+
rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
&addr32, &data);
if (rc) {
@@ -829,9 +950,9 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
return -EIO;
}
msg->address_hi = 0;
- msg->address_lo = addr32;
+ msg->address_lo = be32_to_cpu(addr32);
}
- msg->data = data;
+ msg->data = be32_to_cpu(data);
/*
* Change the IRQ chip for the MSI interrupts on PHB3.
@@ -1105,9 +1226,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
{
struct pci_controller *hose;
struct pnv_phb *phb;
- unsigned long size, m32map_off, iomap_off, pemap_off;
- const u64 *prop64;
- const u32 *prop32;
+ unsigned long size, m32map_off, pemap_off, iomap_off = 0;
+ const __be64 *prop64;
+ const __be32 *prop32;
int len;
u64 phb_id;
void *aux;
@@ -1142,8 +1263,8 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
spin_lock_init(&phb->lock);
prop32 = of_get_property(np, "bus-range", &len);
if (prop32 && len == 8) {
- hose->first_busno = prop32[0];
- hose->last_busno = prop32[1];
+ hose->first_busno = be32_to_cpu(prop32[0]);
+ hose->last_busno = be32_to_cpu(prop32[1]);
} else {
pr_warn(" Broken <bus-range> on %s\n", np->full_name);
hose->first_busno = 0;
@@ -1171,12 +1292,13 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
pr_err(" Failed to map registers !\n");
/* Initialize more IODA stuff */
+ phb->ioda.total_pe = 1;
prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
- if (!prop32)
- phb->ioda.total_pe = 1;
- else
- phb->ioda.total_pe = *prop32;
-
+ if (prop32)
+ phb->ioda.total_pe = be32_to_cpup(prop32);
+ prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
+ if (prop32)
+ phb->ioda.reserved_pe = be32_to_cpup(prop32);
phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
/* FW Has already off top 64k of M32 space (MSI space) */
phb->ioda.m32_size += 0x10000;
@@ -1191,7 +1313,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
m32map_off = size;
size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
- iomap_off = size;
if (phb->type == PNV_PHB_IODA1) {
iomap_off = size;
size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
@@ -1205,7 +1326,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
if (phb->type == PNV_PHB_IODA1)
phb->ioda.io_segmap = aux + iomap_off;
phb->ioda.pe_array = aux + pemap_off;
- set_bit(0, phb->ioda.pe_alloc);
+ set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
INIT_LIST_HEAD(&phb->ioda.pe_list);
@@ -1230,8 +1351,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
segment_size);
#endif
- pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
+ pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]"
+ " IO: 0x%x [segment=0x%x]\n",
phb->ioda.total_pe,
+ phb->ioda.reserved_pe,
phb->ioda.m32_size, phb->ioda.m32_segsize,
phb->ioda.io_size, phb->ioda.io_segsize);
@@ -1245,6 +1368,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
+ phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
/* Setup shutdown function for kexec */
phb->shutdown = pnv_pci_ioda_shutdown;
@@ -1262,6 +1386,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
+ ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
pci_add_flags(PCI_REASSIGN_ALL_RSRC);
/* Reset IODA tables to a clean state */
@@ -1269,12 +1394,16 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
if (rc)
pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
- /*
- * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
- * has cleared the RTT which has the same effect
+ /* If we're running in kdump kerenl, the previous kerenl never
+ * shutdown PCI devices correctly. We already got IODA table
+ * cleaned out. So we have to issue PHB reset to stop all PCI
+ * transactions from previous kerenl.
*/
- if (ioda_type == PNV_PHB_IODA1)
- opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
+ if (is_kdump_kernel()) {
+ pr_info(" Issue PHB reset ...\n");
+ ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+ ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
+ }
}
void __init pnv_pci_init_ioda2_phb(struct device_node *np)
@@ -1285,7 +1414,7 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
void __init pnv_pci_init_ioda_hub(struct device_node *np)
{
struct device_node *phbn;
- const u64 *prop64;
+ const __be64 *prop64;
u64 hub_id;
pr_info("Probing IODA IO-Hub %s\n", np->full_name);