diff options
-rw-r--r-- | arch/mips/cavium-octeon/Kconfig | 85 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/Makefile | 16 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/csrc-octeon.c | 58 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/dma-octeon.c | 32 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/flash_setup.c | 84 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/octeon-irq.c | 497 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/octeon-memcpy.S | 521 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/serial.c | 136 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/setup.c | 929 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/smp.c | 211 | ||||
-rw-r--r-- | arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h | 78 | ||||
-rw-r--r-- | arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h | 64 | ||||
-rw-r--r-- | arch/mips/include/asm/mach-cavium-octeon/irq.h | 244 | ||||
-rw-r--r-- | arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h | 131 | ||||
-rw-r--r-- | arch/mips/include/asm/mach-cavium-octeon/war.h | 26 | ||||
-rw-r--r-- | arch/mips/include/asm/octeon/octeon.h | 248 | ||||
-rw-r--r-- | arch/mips/kernel/octeon_switch.S | 506 | ||||
-rw-r--r-- | arch/mips/mm/c-octeon.c | 307 | ||||
-rw-r--r-- | arch/mips/mm/cex-oct.S | 70 |
19 files changed, 4243 insertions, 0 deletions
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig new file mode 100644 index 00000000000..094c17e38e1 --- /dev/null +++ b/arch/mips/cavium-octeon/Kconfig @@ -0,0 +1,85 @@ +config CAVIUM_OCTEON_SPECIFIC_OPTIONS + bool "Enable Octeon specific options" + depends on CPU_CAVIUM_OCTEON + default "y" + +config CAVIUM_OCTEON_2ND_KERNEL + bool "Build the kernel to be used as a 2nd kernel on the same chip" + depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS + default "n" + help + This option configures this kernel to be linked at a different + address and use the 2nd uart for output. This allows a kernel built + with this option to be run at the same time as one built without this + option. + +config CAVIUM_OCTEON_HW_FIX_UNALIGNED + bool "Enable hardware fixups of unaligned loads and stores" + depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS + default "y" + help + Configure the Octeon hardware to automatically fix unaligned loads + and stores. Normally unaligned accesses are fixed using a kernel + exception handler. This option enables the hardware automatic fixups, + which requires only an extra 3 cycles. Disable this option if you + are running code that relies on address exceptions on unaligned + accesses. + +config CAVIUM_OCTEON_CVMSEG_SIZE + int "Number of L1 cache lines reserved for CVMSEG memory" + depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS + range 0 54 + default 1 + help + CVMSEG LM is a segment that accesses portions of the dcache as a + local memory; the larger CVMSEG is, the smaller the cache is. + This selects the size of CVMSEG LM, which is in cache blocks. The + legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is + between zero and 6192 bytes). + +config CAVIUM_OCTEON_LOCK_L2 + bool "Lock often used kernel code in the L2" + depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS + default "y" + help + Enable locking parts of the kernel into the L2 cache. + +config CAVIUM_OCTEON_LOCK_L2_TLB + bool "Lock the TLB handler in L2" + depends on CAVIUM_OCTEON_LOCK_L2 + default "y" + help + Lock the low level TLB fast path into L2. + +config CAVIUM_OCTEON_LOCK_L2_EXCEPTION + bool "Lock the exception handler in L2" + depends on CAVIUM_OCTEON_LOCK_L2 + default "y" + help + Lock the low level exception handler into L2. + +config CAVIUM_OCTEON_LOCK_L2_LOW_LEVEL_INTERRUPT + bool "Lock the interrupt handler in L2" + depends on CAVIUM_OCTEON_LOCK_L2 + default "y" + help + Lock the low level interrupt handler into L2. + +config CAVIUM_OCTEON_LOCK_L2_INTERRUPT + bool "Lock the 2nd level interrupt handler in L2" + depends on CAVIUM_OCTEON_LOCK_L2 + default "y" + help + Lock the 2nd level interrupt handler in L2. + +config CAVIUM_OCTEON_LOCK_L2_MEMCPY + bool "Lock memcpy() in L2" + depends on CAVIUM_OCTEON_LOCK_L2 + default "y" + help + Lock the kernel's implementation of memcpy() into L2. + +config ARCH_SPARSEMEM_ENABLE + def_bool y + select SPARSEMEM_STATIC + depends on CPU_CAVIUM_OCTEON diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile new file mode 100644 index 00000000000..1c2a7faf588 --- /dev/null +++ b/arch/mips/cavium-octeon/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the Cavium Octeon specific kernel interface routines +# under Linux. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2005-2008 Cavium Networks +# + +obj-y := setup.o serial.o octeon-irq.o csrc-octeon.o +obj-y += dma-octeon.o flash_setup.o +obj-y += octeon-memcpy.o + +obj-$(CONFIG_SMP) += smp.o diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c new file mode 100644 index 00000000000..70fd92c3165 --- /dev/null +++ b/arch/mips/cavium-octeon/csrc-octeon.c @@ -0,0 +1,58 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2007 by Ralf Baechle + */ +#include <linux/clocksource.h> +#include <linux/init.h> + +#include <asm/time.h> + +#include <asm/octeon/octeon.h> +#include <asm/octeon/cvmx-ipd-defs.h> + +/* + * Set the current core's cvmcount counter to the value of the + * IPD_CLK_COUNT. We do this on all cores as they are brought + * on-line. This allows for a read from a local cpu register to + * access a synchronized counter. + * + */ +void octeon_init_cvmcount(void) +{ + unsigned long flags; + unsigned loops = 2; + + /* Clobber loops so GCC will not unroll the following while loop. */ + asm("" : "+r" (loops)); + + local_irq_save(flags); + /* + * Loop several times so we are executing from the cache, + * which should give more deterministic timing. + */ + while (loops--) + write_c0_cvmcount(cvmx_read_csr(CVMX_IPD_CLK_COUNT)); + local_irq_restore(flags); +} + +static cycle_t octeon_cvmcount_read(void) +{ + return read_c0_cvmcount(); +} + +static struct clocksource clocksource_mips = { + .name = "OCTEON_CVMCOUNT", + .read = octeon_cvmcount_read, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +void __init plat_time_init(void) +{ + clocksource_mips.rating = 300; + clocksource_set_clock(&clocksource_mips, mips_hpt_frequency); + clocksource_register(&clocksource_mips); +} diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c new file mode 100644 index 00000000000..01b1ef94b36 --- /dev/null +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -0,0 +1,32 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> + * Copyright (C) 2000, 2001 Ralf Baechle <ralf@gnu.org> + * Copyright (C) 2005 Ilya A. Volynets-Evenbakh <ilya@total-knowledge.com> + * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. + * IP32 changes by Ilya. + * Cavium Networks: Create new dma setup for Cavium Networks Octeon based on + * the kernels original. + */ +#include <linux/types.h> +#include <linux/mm.h> + +#include <dma-coherence.h> + +dma_addr_t octeon_map_dma_mem(struct device *dev, void *ptr, size_t size) +{ + /* Without PCI/PCIe this function can be called for Octeon internal + devices such as USB. These devices all support 64bit addressing */ + mb(); + return virt_to_phys(ptr); +} + +void octeon_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr) +{ + /* Without PCI/PCIe this function can be called for Octeon internal + * devices such as USB. These devices all support 64bit addressing */ + return; +} diff --git a/arch/mips/cavium-octeon/flash_setup.c b/arch/mips/cavium-octeon/flash_setup.c new file mode 100644 index 00000000000..553d36cbcc4 --- /dev/null +++ b/arch/mips/cavium-octeon/flash_setup.c @@ -0,0 +1,84 @@ +/* + * Octeon Bootbus flash setup + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2007, 2008 Cavium Networks + */ +#include <linux/kernel.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/map.h> +#include <linux/mtd/partitions.h> + +#include <asm/octeon/octeon.h> + +static struct map_info flash_map; +static struct mtd_info *mymtd; +#ifdef CONFIG_MTD_PARTITIONS +static int nr_parts; +static struct mtd_partition *parts; +static const char *part_probe_types[] = { + "cmdlinepart", +#ifdef CONFIG_MTD_REDBOOT_PARTS + "RedBoot", +#endif + NULL +}; +#endif + +/** + * Module/ driver initialization. + * + * Returns Zero on success + */ +static int __init flash_init(void) +{ + /* + * Read the bootbus region 0 setup to determine the base + * address of the flash. + */ + union cvmx_mio_boot_reg_cfgx region_cfg; + region_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(0)); + if (region_cfg.s.en) { + /* + * The bootloader always takes the flash and sets its + * address so the entire flash fits below + * 0x1fc00000. This way the flash aliases to + * 0x1fc00000 for booting. Software can access the + * full flash at the true address, while core boot can + * access 4MB. + */ + /* Use this name so old part lines work */ + flash_map.name = "phys_mapped_flash"; + flash_map.phys = region_cfg.s.base << 16; + flash_map.size = 0x1fc00000 - flash_map.phys; + flash_map.bankwidth = 1; + flash_map.virt = ioremap(flash_map.phys, flash_map.size); + pr_notice("Bootbus flash: Setting flash for %luMB flash at " + "0x%08lx\n", flash_map.size >> 20, flash_map.phys); + simple_map_init(&flash_map); + mymtd = do_map_probe("cfi_probe", &flash_map); + if (mymtd) { + mymtd->owner = THIS_MODULE; + +#ifdef CONFIG_MTD_PARTITIONS + nr_parts = parse_mtd_partitions(mymtd, + part_probe_types, + &parts, 0); + if (nr_parts > 0) + add_mtd_partitions(mymtd, parts, nr_parts); + else + add_mtd_device(mymtd); +#else + add_mtd_device(mymtd); +#endif + } else { + pr_err("Failed to register MTD device for flash\n"); + } + } + return 0; +} + +late_initcall(flash_init); diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c new file mode 100644 index 00000000000..fc72984a5da --- /dev/null +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -0,0 +1,497 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2008 Cavium Networks + */ +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/hardirq.h> + +#include <asm/octeon/octeon.h> + +DEFINE_RWLOCK(octeon_irq_ciu0_rwlock); +DEFINE_RWLOCK(octeon_irq_ciu1_rwlock); +DEFINE_SPINLOCK(octeon_irq_msi_lock); + +static void octeon_irq_core_ack(unsigned int irq) +{ + unsigned int bit = irq - OCTEON_IRQ_SW0; + /* + * We don't need to disable IRQs to make these atomic since + * they are already disabled earlier in the low level + * interrupt code. + */ + clear_c0_status(0x100 << bit); + /* The two user interrupts must be cleared manually. */ + if (bit < 2) + clear_c0_cause(0x100 << bit); +} + +static void octeon_irq_core_eoi(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned int bit = irq - OCTEON_IRQ_SW0; + /* + * If an IRQ is being processed while we are disabling it the + * handler will attempt to unmask the interrupt after it has + * been disabled. + */ + if (desc->status & IRQ_DISABLED) + return; + + /* There is a race here. We should fix it. */ + + /* + * We don't need to disable IRQs to make these atomic since + * they are already disabled earlier in the low level + * interrupt code. + */ + set_c0_status(0x100 << bit); +} + +static void octeon_irq_core_enable(unsigned int irq) +{ + unsigned long flags; + unsigned int bit = irq - OCTEON_IRQ_SW0; + + /* + * We need to disable interrupts to make sure our updates are + * atomic. + */ + local_irq_save(flags); + set_c0_status(0x100 << bit); + local_irq_restore(flags); +} + +static void octeon_irq_core_disable_local(unsigned int irq) +{ + unsigned long flags; + unsigned int bit = irq - OCTEON_IRQ_SW0; + /* + * We need to disable interrupts to make sure our updates are + * atomic. + */ + local_irq_save(flags); + clear_c0_status(0x100 << bit); + local_irq_restore(flags); +} + +static void octeon_irq_core_disable(unsigned int irq) +{ +#ifdef CONFIG_SMP + on_each_cpu((void (*)(void *)) octeon_irq_core_disable_local, + (void *) (long) irq, 1); +#else + octeon_irq_core_disable_local(irq); +#endif +} + +static struct irq_chip octeon_irq_chip_core = { + .name = "Core", + .enable = octeon_irq_core_enable, + .disable = octeon_irq_core_disable, + .ack = octeon_irq_core_ack, + .eoi = octeon_irq_core_eoi, +}; + + +static void octeon_irq_ciu0_ack(unsigned int irq) +{ + /* + * In order to avoid any locking accessing the CIU, we + * acknowledge CIU interrupts by disabling all of them. This + * way we can use a per core register and avoid any out of + * core locking requirements. This has the side affect that + * CIU interrupts can't be processed recursively. + * + * We don't need to disable IRQs to make these atomic since + * they are already disabled earlier in the low level + * interrupt code. + */ + clear_c0_status(0x100 << 2); +} + +static void octeon_irq_ciu0_eoi(unsigned int irq) +{ + /* + * Enable all CIU interrupts again. We don't need to disable + * IRQs to make these atomic since they are already disabled + * earlier in the low level interrupt code. + */ + set_c0_status(0x100 << 2); +} + +static void octeon_irq_ciu0_enable(unsigned int irq) +{ + int coreid = cvmx_get_core_num(); + unsigned long flags; + uint64_t en0; + int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ + + /* + * A read lock is used here to make sure only one core is ever + * updating the CIU enable bits at a time. During an enable + * the cores don't interfere with each other. During a disable + * the write lock stops any enables that might cause a + * problem. + */ + read_lock_irqsave(&octeon_irq_ciu0_rwlock, flags); + en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + en0 |= 1ull << bit; + cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0); + cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + read_unlock_irqrestore(&octeon_irq_ciu0_rwlock, flags); +} + +static void octeon_irq_ciu0_disable(unsigned int irq) +{ + int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ + unsigned long flags; + uint64_t en0; +#ifdef CONFIG_SMP + int cpu; + write_lock_irqsave(&octeon_irq_ciu0_rwlock, flags); + for_each_online_cpu(cpu) { + int coreid = cpu_logical_map(cpu); + en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + en0 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0); + } + /* + * We need to do a read after the last update to make sure all + * of them are done. + */ + cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2)); + write_unlock_irqrestore(&octeon_irq_ciu0_rwlock, flags); +#else + int coreid = cvmx_get_core_num(); + local_irq_save(flags); + en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + en0 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0); + cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + local_irq_restore(flags); +#endif +} + +#ifdef CONFIG_SMP +static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest) +{ + int cpu; + int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ + + write_lock(&octeon_irq_ciu0_rwlock); + for_each_online_cpu(cpu) { + int coreid = cpu_logical_map(cpu); + uint64_t en0 = + cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)); + if (cpumask_test_cpu(cpu, dest)) + en0 |= 1ull << bit; + else + en0 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0); + } + /* + * We need to do a read after the last update to make sure all + * of them are done. + */ + cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2)); + write_unlock(&octeon_irq_ciu0_rwlock); +} +#endif + +static struct irq_chip octeon_irq_chip_ciu0 = { + .name = "CIU0", + .enable = octeon_irq_ciu0_enable, + .disable = octeon_irq_ciu0_disable, + .ack = octeon_irq_ciu0_ack, + .eoi = octeon_irq_ciu0_eoi, +#ifdef CONFIG_SMP + .set_affinity = octeon_irq_ciu0_set_affinity, +#endif +}; + + +static void octeon_irq_ciu1_ack(unsigned int irq) +{ + /* + * In order to avoid any locking accessing the CIU, we + * acknowledge CIU interrupts by disabling all of them. This + * way we can use a per core register and avoid any out of + * core locking requirements. This has the side affect that + * CIU interrupts can't be processed recursively. We don't + * need to disable IRQs to make these atomic since they are + * already disabled earlier in the low level interrupt code. + */ + clear_c0_status(0x100 << 3); +} + +static void octeon_irq_ciu1_eoi(unsigned int irq) +{ + /* + * Enable all CIU interrupts again. We don't need to disable + * IRQs to make these atomic since they are already disabled + * earlier in the low level interrupt code. + */ + set_c0_status(0x100 << 3); +} + +static void octeon_irq_ciu1_enable(unsigned int irq) +{ + int coreid = cvmx_get_core_num(); + unsigned long flags; + uint64_t en1; + int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ + + /* + * A read lock is used here to make sure only one core is ever + * updating the CIU enable bits at a time. During an enable + * the cores don't interfere with each other. During a disable + * the write lock stops any enables that might cause a + * problem. + */ + read_lock_irqsave(&octeon_irq_ciu1_rwlock, flags); + en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + en1 |= 1ull << bit; + cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1); + cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + read_unlock_irqrestore(&octeon_irq_ciu1_rwlock, flags); +} + +static void octeon_irq_ciu1_disable(unsigned int irq) +{ + int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ + unsigned long flags; + uint64_t en1; +#ifdef CONFIG_SMP + int cpu; + write_lock_irqsave(&octeon_irq_ciu1_rwlock, flags); + for_each_online_cpu(cpu) { + int coreid = cpu_logical_map(cpu); + en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + en1 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1); + } + /* + * We need to do a read after the last update to make sure all + * of them are done. + */ + cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1)); + write_unlock_irqrestore(&octeon_irq_ciu1_rwlock, flags); +#else + int coreid = cvmx_get_core_num(); + local_irq_save(flags); + en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + en1 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1); + cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)); + local_irq_restore(flags); +#endif +} + +#ifdef CONFIG_SMP +static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest) +{ + int cpu; + int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ + + write_lock(&octeon_irq_ciu1_rwlock); + for_each_online_cpu(cpu) { + int coreid = cpu_logical_map(cpu); + uint64_t en1 = + cvmx_read_csr(CVMX_CIU_INTX_EN1 + (coreid * 2 + 1)); + if (cpumask_test_cpu(cpu, dest)) + en1 |= 1ull << bit; + else + en1 &= ~(1ull << bit); + cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1); + } + /* + * We need to do a read after the last update to make sure all + * of them are done. + */ + cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1)); + write_unlock(&octeon_irq_ciu1_rwlock); +} +#endif + +static struct irq_chip octeon_irq_chip_ciu1 = { + .name = "CIU1", + .enable = octeon_irq_ciu1_enable, + .disable = octeon_irq_ciu1_disable, + .ack = octeon_irq_ciu1_ack, + .eoi = octeon_irq_ciu1_eoi, +#ifdef CONFIG_SMP + .set_affinity = octeon_irq_ciu1_set_affinity, +#endif +}; + +#ifdef CONFIG_PCI_MSI + +static void octeon_irq_msi_ack(unsigned int irq) +{ + if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) { + /* These chips have PCI */ + cvmx_write_csr(CVMX_NPI_NPI_MSI_RCV, + 1ull << (irq - OCTEON_IRQ_MSI_BIT0)); + } else { + /* + * These chips have PCIe. Thankfully the ACK doesn't + * need any locking. + */ + cvmx_write_csr(CVMX_PEXP_NPEI_MSI_RCV0, + 1ull << (irq - OCTEON_IRQ_MSI_BIT0)); + } +} + +static void octeon_irq_msi_eoi(unsigned int irq) +{ + /* Nothing needed */ +} + +static void octeon_irq_msi_enable(unsigned int irq) +{ + if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) { + /* + * Octeon PCI doesn't have the ability to mask/unmask + * MSI interrupts individually. Instead of + * masking/unmasking them in groups of 16, we simple + * assume MSI devices are well behaved. MSI + * interrupts are always enable and the ACK is assumed + * to be enough. + */ + } else { + /* These chips have PCIe. Note that we only support + * the first 64 MSI interrupts. Unfortunately all the + * MSI enables are in the same register. We use + * MSI0's lock to control access to them all. + */ + uint64_t en; + unsigned long flags; + spin_lock_irqsave(&octeon_irq_msi_lock, flags); + en = cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0); + en |= 1ull << (irq - OCTEON_IRQ_MSI_BIT0); + cvmx_write_csr(CVMX_PEXP_NPEI_MSI_ENB0, en); + cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0); + spin_unlock_irqrestore(&octeon_irq_msi_lock, flags); + } +} + +static void octeon_irq_msi_disable(unsigned int irq) +{ + if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) { + /* See comment in enable */ + } else { + /* + * These chips have PCIe. Note that we only support + * the first 64 MSI interrupts. Unfortunately all the + * MSI enables are in the same register. We use + * MSI0's lock to control access to them all. + */ + uint64_t en; + unsigned long flags; + spin_lock_irqsave(&octeon_irq_msi_lock, flags); + en = cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0); + en &= ~(1ull << (irq - OCTEON_IRQ_MSI_BIT0)); + cvmx_write_csr(CVMX_PEXP_NPEI_MSI_ENB0, en); + cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0); + spin_unlock_irqrestore(&octeon_irq_msi_lock, flags); + } +} + +static struct irq_chip octeon_irq_chip_msi = { + .name = "MSI", + .enable = octeon_irq_msi_enable, + .disable = octeon_irq_msi_disable, + .ack = octeon_irq_msi_ack, + .eoi = octeon_irq_msi_eoi, +}; +#endif + +void __init arch_init_irq(void) +{ + int irq; + +#ifdef CONFIG_SMP + /* Set the default affinity to the boot cpu. */ + cpumask_clear(irq_default_affinity); + cpumask_set_cpu(smp_processor_id(), irq_default_affinity); +#endif + + if (NR_IRQS < OCTEON_IRQ_LAST) + pr_err("octeon_irq_init: NR_IRQS is set too low\n"); + + /* 0 - 15 reserved for i8259 master and slave controller. */ + + /* 17 - 23 Mips internal */ + for (irq = OCTEON_IRQ_SW0; irq <= OCTEON_IRQ_TIMER; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_core, + handle_percpu_irq); + } + + /* 24 - 87 CIU_INT_SUM0 */ + for (irq = OCTEON_IRQ_WORKQ0; irq <= OCTEON_IRQ_BOOTDMA; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_ciu0, + handle_percpu_irq); + } + + /* 88 - 151 CIU_INT_SUM1 */ + for (irq = OCTEON_IRQ_WDOG0; irq <= OCTEON_IRQ_RESERVED151; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_ciu1, + handle_percpu_irq); + } + +#ifdef CONFIG_PCI_MSI + /* 152 - 215 PCI/PCIe MSI interrupts */ + for (irq = OCTEON_IRQ_MSI_BIT0; irq <= OCTEON_IRQ_MSI_BIT63; irq++) { + set_irq_chip_and_handler(irq, &octeon_irq_chip_msi, + handle_percpu_irq); + } +#endif + set_c0_status(0x300 << 2); +} + +asmlinkage void plat_irq_dispatch(void) +{ + const unsigned long core_id = cvmx_get_core_num(); + const uint64_t ciu_sum0_address = CVMX_CIU_INTX_SUM0(core_id * 2); + const uint64_t ciu_en0_address = CVMX_CIU_INTX_EN0(core_id * 2); + const uint64_t ciu_sum1_address = CVMX_CIU_INT_SUM1; + const uint64_t ciu_en1_address = CVMX_CIU_INTX_EN1(core_id * 2 + 1); + unsigned long cop0_cause; + unsigned long cop0_status; + uint64_t ciu_en; + uint64_t ciu_sum; + + while (1) { + cop0_cause = read_c0_cause(); + cop0_status = read_c0_status(); + cop0_cause &= cop0_status; + cop0_cause &= ST0_IM; + + if (unlikely(cop0_cause & STATUSF_IP2)) { + ciu_sum = cvmx_read_csr(ciu_sum0_address); + ciu_en = cvmx_read_csr(ciu_en0_address); + ciu_sum &= ciu_en; + if (likely(ciu_sum)) + do_IRQ(fls64(ciu_sum) + OCTEON_IRQ_WORKQ0 - 1); + else + spurious_interrupt(); + } else if (unlikely(cop0_cause & STATUSF_IP3)) { + ciu_sum = cvmx_read_csr(ciu_sum1_address); + ciu_en = cvmx_read_csr(ciu_en1_address); + ciu_sum &= ciu_en; + if (likely(ciu_sum)) + do_IRQ(fls64(ciu_sum) + OCTEON_IRQ_WDOG0 - 1); + else + spurious_interrupt(); + } else if (likely(cop0_cause)) { + do_IRQ(fls(cop0_cause) - 9 + MIPS_CPU_IRQ_BASE); + } else { + break; + } + } +} diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S new file mode 100644 index 00000000000..88e0cddca20 --- /dev/null +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -0,0 +1,521 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Unified implementation of memcpy, memmove and the __copy_user backend. + * + * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) + * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. + * Copyright (C) 2002 Broadcom, Inc. + * memcpy/copy_user author: Mark Vandevoorde + * + * Mnemonic names for arguments to memcpy/__copy_user + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/regdef.h> + +#define dst a0 +#define src a1 +#define len a2 + +/* + * Spec + * + * memcpy copies len bytes from src to dst and sets v0 to dst. + * It assumes that + * - src and dst don't overlap + * - src is readable + * - dst is writable + * memcpy uses the standard calling convention + * + * __copy_user copies up to len bytes from src to dst and sets a2 (len) to + * the number of uncopied bytes due to an exception caused by a read or write. + * __copy_user assumes that src and dst don't overlap, and that the call is + * implementing one of the following: + * copy_to_user + * - src is readable (no exceptions when reading src) + * copy_from_user + * - dst is writable (no exceptions when writing dst) + * __copy_user uses a non-standard calling convention; see + * arch/mips/include/asm/uaccess.h + * + * When an exception happens on a load, the handler must + # ensure that all of the destination buffer is overwritten to prevent + * leaking information to user mode programs. + */ + +/* + * Implementation + */ + +/* + * The exception handler for loads requires that: + * 1- AT contain the address of the byte just past the end of the source + * of the copy, + * 2- src_entry <= src < AT, and + * 3- (dst - src) == (dst_entry - src_entry), + * The _entry suffix denotes values when __copy_user was called. + * + * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user + * (2) is met by incrementing src by the number of bytes copied + * (3) is met by not doing loads between a pair of increments of dst and src + * + * The exception handlers for stores adjust len (if necessary) and return. + * These handlers do not need to overwrite any data. + * + * For __rmemcpy and memmove an exception is always a kernel bug, therefore + * they're not protected. + */ + +#define EXC(inst_reg,addr,handler) \ +9: inst_reg, addr; \ + .section __ex_table,"a"; \ + PTR 9b, handler; \ + .previous + +/* + * Only on the 64-bit kernel we can made use of 64-bit registers. + */ +#ifdef CONFIG_64BIT +#define USE_DOUBLE +#endif + +#ifdef USE_DOUBLE + +#define LOAD ld +#define LOADL ldl +#define LOADR ldr +#define STOREL sdl +#define STORER sdr +#define STORE sd +#define ADD daddu +#define SUB dsubu +#define SRL dsrl +#define SRA dsra +#define SLL dsll +#define SLLV dsllv +#define SRLV dsrlv +#define NBYTES 8 +#define LOG_NBYTES 3 + +/* + * As we are sharing code base with the mips32 tree (which use the o32 ABI + * register definitions). We need to redefine the register definitions from + * the n64 ABI register naming to the o32 ABI register naming. + */ +#undef t0 +#undef t1 +#undef t2 +#undef t3 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 + +#else + +#define LOAD lw +#define LOADL lwl +#define LOADR lwr +#define STOREL swl +#define STORER swr +#define STORE sw +#define ADD addu +#define SUB subu +#define SRL srl +#define SLL sll +#define SRA sra +#define SLLV sllv +#define SRLV srlv +#define NBYTES 4 +#define LOG_NBYTES 2 + +#endif /* USE_DOUBLE */ + +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define LDFIRST LOADR +#define LDREST LOADL +#define STFIRST STORER +#define STREST STOREL +#define SHIFT_DISCARD SLLV +#else +#define LDFIRST LOADL +#define LDREST LOADR +#define STFIRST STOREL +#define STREST STORER +#define SHIFT_DISCARD SRLV +#endif + +#define FIRST(unit) ((unit)*NBYTES) +#define REST(unit) (FIRST(unit)+NBYTES-1) +#define UNIT(unit) FIRST(unit) + +#define ADDRMASK (NBYTES-1) + + .text + .set noreorder + .set noat + +/* + * A combined memcpy/__copy_user + * __copy_user sets len to 0 for success; else to an upper bound of + * the number of uncopied bytes. + * memcpy sets v0 to dst. + */ + .align 5 +LEAF(memcpy) /* a0=dst a1=src a2=len */ + move v0, dst /* return value */ +__memcpy: +FEXPORT(__copy_user) + /* + * Note: dst & src may be unaligned, len may be 0 + * Temps + */ + # + # Octeon doesn't care if the destination is unaligned. The hardware + # can fix it faster than we can special case the assembly. + # + pref 0, 0(src) + sltu t0, len, NBYTES # Check if < 1 word + bnez t0, copy_bytes_checklen + and t0, src, ADDRMASK # Check if src unaligned + bnez t0, src_unaligned + sltu t0, len, 4*NBYTES # Check if < 4 words + bnez t0, less_than_4units + sltu t0, len, 8*NBYTES # Check if < 8 words + bnez t0, less_than_8units + sltu t0, len, 16*NBYTES # Check if < 16 words + bnez t0, cleanup_both_aligned + sltu t0, len, 128+1 # Check if len < 129 + bnez t0, 1f # Skip prefetch if len is too short + sltu t0, len, 256+1 # Check if len < 257 + bnez t0, 1f # Skip prefetch if len is too short + pref 0, 128(src) # We must not prefetch invalid addresses + # + # This is where we loop if there is more than 128 bytes left +2: pref 0 |